# Please note this is all the raw R code pulled from all the `.Rmd` files.
# This code is for the 16s rRNA data only since the metagenomic analyses are a mix
# of anvio commands and R-code
# Use at your own risk as this has not been tested independent of the Rmarkdown
# workflows. In other words, the code works in the Rmarkdown format but the
# complete pipeline has not been tested using just this code. I used `knitr::purl()`
# to pull out the R code from the Rmarkdown file. I did this for you in case you
# wanted the code and not hear me drone on about colors or zoomable figures.
# Commands that are commented out are things I tried that I could never get to work.
# Any line that starts like this: `## ----` is the code chuck name and details.
#############################
##
## Front Page
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = FALSE)
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
library("wordcloud2")
library("webshot")
library("htmlwidgets")
library(dplyr)
library(leaflet)
library(htmlwidgets)
#### ---- echo=FALSE, warning=FALSE, layout='l-screen'-----------------------------
knitr::include_graphics("figures/index/banner.png")
#### ---- layout='l-screen-inset shaded', fig.height=5.5---------------------------
#stri_icon <- makeIcon(
# iconUrl = "assets/stri_logo.png",
# iconWidth = 38, iconHeight = 38
# iconAnchorX = 22, iconAnchorY = 94,
#)
leaflet() %>%
setView(-82.235052, 9.277137, zoom = 12) %>%
addTiles() %>%
addScaleBar(position = "topright") %>%
# addMiniMap(position = "topright", zoomLevelOffset = -5) %>%
addCircleMarkers(lng = c(-82.334720, -82.131119),
lat = c(9.219640, 9.248433), radius = 22.5,
label = c("Cayo Roldan (impacted)", "Coral Caye (control)"),
labelOptions = labelOptions(textOnly = TRUE,
noHide = TRUE,
direction = "bottom", offset = c(6, 12),
style = list("font-size" = "14px" ,
"font-style" = "bold")),
color = c("#141414", "#141414"),
fillColor = c("#D55E00", "#0072B2"),
stroke = TRUE, fillOpacity = 0.9, weight = 10) %>%
addCircleMarkers(lng = -82.256463,
lat = 9.352402,
label = "STRI",
labelOptions = labelOptions(textOnly = TRUE,
noHide = TRUE,
direction = "left", offset = c(-20, 1),
style = list("font-size" = "20px" ,
"font-style" = "bold")),
color = "#0072B2",
fillColor = "#F0E442",
stroke = TRUE, fillOpacity = 0.95, weight = 5,
opacity = 0.95)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
filePath <- "files/main.txt"
text <- readLines(filePath)
docs <- Corpus(VectorSource(text))
inspect(docs)
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("used", "based", "samples",
"using", "sample", "one",
"two", "site", "within", "fig"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
docs <- tm_map(docs, stemDocument)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m), decreasing=TRUE)
d <- data.frame(word = names(v),freq = v)
head(d, 200)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
set.seed(1234)
d1 <- d %>% filter(freq > 1)
#wordcloud(words = d$word, freq = d$freq, min.freq = 1, scale=c(2,.5),
# max.words=200, random.order=FALSE, rot.per=0.35,
# colors=brewer.pal(8, "Dark2"))
wc <- wordcloud2(data = d1, size = 1.1, color = 'random-dark', fontFamily = "serif", gridSize = 0)
saveWidget(wc, "tmp.html", selfcontained = F)
webshot("tmp.html", "wordcloud.png", delay = 10, vwidth = 1080, vheight = 480)
#### ---- echo=FALSE, warning=FALSE, fig.height=1.5, layout='l-screen'-------------
knitr::include_graphics("figures/index/wordcloud.png")
#############################
##
## Site Overview
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(DT)
library(ggplot2)
library(scales)
library(magick)
library(gridExtra)
library(grid)
options(scipen=999)
#### ---- color_121, layout="l-body-outset", comment='', include=FALSE-------------
# Wong palette
wong_pal <- c("#009E73", "#D55E00", "#F0E442",
"#CC79A7", "#56B4E9", "#E69F00",
"#0072B2", "#7F7F7F", "#000000")
palette(wong_pal)
gs1 <- lapply(wong_pal, function(ii, wong_pal)
grobTree(
rectGrob(
gp = gpar(fill = ii)),
textGrob(ii, gp = gpar(col = "white", fontsize = 10))))
gs1 <- grid.arrange(grobs = gs1, ncol = 3,
top = "8-COLOR PALETTE", right = "")
# 12 color palette
twelve_pal <- c("#323232", "#BF3465", "#50B29E", "#D9D9D9",
"#731683", "#1C6CCC", "#21BCFF", "#DFA5E5",
"#874310", "#DB6D1B", "#B8CE17", "#F4E345")
palette(twelve_pal)
gs2 <- lapply(twelve_pal, function(ii, twelve_pal)
grobTree(
rectGrob(
gp = gpar(fill = ii)),
textGrob(ii, gp = gpar(col = "white", fontsize = 10))))
gs2 <- grid.arrange(grobs = gs2, ncol = 4,
top = "12-COLOR PALETTE", left = "")
#### ---- color_8, layout="l-body-outset", comment='', echo=FALSE, fig.height=3----
grid.arrange(gs1, gs2, ncol = 2)
#############################
##
## Field Analyses
##
#############################
#### ---- setup, include = FALSE----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
rm(list = ls())
library(lme4)
library(lmerTest)
library(car)
library(rcompanion)
library(MASS)
library(emmeans)
library(nlme)
library(ggplot2)
library(plyr)
library(RColorBrewer)
library(wesanderson)
library(reshape2) # restructuring the data
library(cowplot)
library(scales)
library(tidyr)
library(dplyr)
library(patchwork)
#### ---- --------------------------------------------------------------------------
data <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",")
attach(data)
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(yield, main = "PAM", xlab = "Yield")
#### ---- --------------------------------------------------------------------------
lmepam <- lmer(yield ~ site + (1|colony), REML = FALSE, na.action = na.exclude)
res1 <- resid(lmepam)
#### ---- eval=FALSE, include=FALSE------------------------------------------------
hist(res1, main = "PAM", xlab = "Residuals")
plotNormalHistogram(res1)
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res1, main = "PAM", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
pam.shapiro <- shapiro.test(res1)
print(pam.shapiro)
#### ---- --------------------------------------------------------------------------
qqnorm(res1, ylab = "Sample Quantiles for PAM")
qqline(res1, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(yield ~ as.factor(site), data = data)
bartlett.test(x = yield, g = site)
#### ---- --------------------------------------------------------------------------
pam1 <- lmer(yield ~ site + (1|colony/frag), data = data,
REML = FALSE, na.action = na.exclude)
ranova(pam1)
#### ---- --------------------------------------------------------------------------
pam2 <- lmer(yield ~ site + (1|colony), data = data,
REML = FALSE, na.action = na.exclude)
ranova(pam2)
#### ---- --------------------------------------------------------------------------
mod.AIC <- AIC(pam1, pam2)
mod.AIC
#### ---- --------------------------------------------------------------------------
finalpam <- lmer(yield ~ site + (1|colony),
data = data,
REML = TRUE, na.action = na.exclude)
#### ---- --------------------------------------------------------------------------
finalrandpam <- ranova(finalpam)
pam.table <- anova(finalpam, type = 2)
#### ---- --------------------------------------------------------------------------
pam.table
#### ---- --------------------------------------------------------------------------
finalrandpam
#### ---- --------------------------------------------------------------------------
attach(data)
plotNormalHistogram(zoox, main = "Zooxanthellae", xlab = "Counts")
#### ---- --------------------------------------------------------------------------
lmezoox <- lmer(zoox ~ site + (1|colony), REML = FALSE, na.action = na.exclude)
res2 <- resid(lmezoox)
#### ---- include=FALSE, eval = FALSE----------------------------------------------
hist(res2, main = "Zoox", xlab = "Residuals")
plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
zoox.shapiro <- shapiro.test(res2) #runs a normality test on residuals
print(zoox.shapiro) # null = normally distrubuted (P<0.05 = non-normal
#### ---- --------------------------------------------------------------------------
qqnorm(res2, ylab = "Sample Quantiles for Zoox")
qqline(res2, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(zoox~ as.factor(site),data = data) # p = 0.0007486
#### ---- --------------------------------------------------------------------------
data$zoox_log = log(zoox)
attach(data)
colnames(data)
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(zoox_log, main = "Zooxanthellae", xlab = "log (counts)")
#### ---- --------------------------------------------------------------------------
lmezoox_log <- lmer(zoox_log ~ site + (1|colony),
REML = FALSE,
na.action = na.exclude)
res3 <- resid(lmezoox_log)
#### ---- include=FALSE, eval = FALSE----------------------------------------------
hist(res3, main = "zoox log", xlab = "Residuals")
plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
zooxlog.shapiro <- shapiro.test(res3)
print(zooxlog.shapiro)
#### ---- --------------------------------------------------------------------------
qqnorm(res3, ylab = "Sample Quantiles for Zooxlog")
qqline(res3, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(zoox_log ~ as.factor(site), data = data)
#### ---- --------------------------------------------------------------------------
zoox1 <- lmer(zoox_log ~ site + (1|colony/frag),
data = data,
REML = FALSE,
na.action = na.exclude) #AIC 72.8756
ranova(zoox1)
#### ---- --------------------------------------------------------------------------
zoox2 <- lmer(zoox_log ~ site + (1|colony),
data = data,
REML = FALSE,
na.action = na.exclude)
ranova(zoox2)
mod.AIC <- AIC(zoox1, zoox2)
mod.AIC
#### ---- --------------------------------------------------------------------------
finalzoox <- lmer(zoox_log ~ site + (1|colony),
data = data,
REML = TRUE,
na.action = na.exclude)
#### ---- --------------------------------------------------------------------------
finalrandzoox <- ranova(finalzoox)
zoox.table <- anova(finalzoox, type = 2) #ANOVA table
zoox.table
finalrandzoox
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chla, main = "Chlorophyll a", xlab = "Chlorophyll")
#### ---- --------------------------------------------------------------------------
lmechla <- lmer(chla ~ site + (1|colony),
REML = FALSE,
na.action = na.exclude)
res4 <- resid(lmechla)
#### ---- include=FALSE, eval=FALSE------------------------------------------------
hist(res4, main = "Chla", xlab = "Residuals")
plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
chla.shapiro <- shapiro.test(res4)
print(chla.shapiro)
#### ---- --------------------------------------------------------------------------
qqnorm(res4, ylab = "Sample Quantiles for Zoox")
qqline(res4, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(chla ~ as.factor(site), data = data) # p = 0.01606
#### ---- --------------------------------------------------------------------------
data$chla_log = log(chla)
#### ---- --------------------------------------------------------------------------
head(data)
attach(data)
colnames(data)
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chla_log, main = "Chlorophyll a", xlab = "log (Chlorophyll)")
#### ---- --------------------------------------------------------------------------
lmechla_log <- lmer(chla_log ~ site + (1|colony),
REML = FALSE,
na.action = na.exclude)
res7 <- resid(lmechla_log)
#### ---- include=FALSE, eval = FALSE----------------------------------------------
hist(res7, main = "chla log", xlab = "Residuals")
plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
chlalog.shapiro <- shapiro.test(res7) #runs a normality test on residuals
print(chlalog.shapiro) # null = normally distrubuted (P<0.05 = non-normal
#### ---- --------------------------------------------------------------------------
qqnorm(res7, ylab = "Sample Quantiles for chlalog")
qqline(res7, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(chla_log ~ as.factor(site), data = data) # p = 0.7487
#### ---- --------------------------------------------------------------------------
chla_log1 <- lmer(chla_log ~ site+(1|colony/frag),
data = data,
REML = FALSE,
na.action = na.exclude) #AIC 55.11567
ranova(chla_log1)
#### ---- --------------------------------------------------------------------------
chla_log2 <- lmer(chla_log ~ site + (1|colony),
data = data,
REML = FALSE,
na.action = na.exclude)
ranova(chla_log2)
mod.AIC <- AIC(chla_log1, chla_log2)
mod.AIC
#### ---- --------------------------------------------------------------------------
finalchla_log <- lmer(chla_log ~ site + (1|colony),
data = data,
REML = TRUE,
na.action = na.exclude)
#### ---- --------------------------------------------------------------------------
finalchla_logrand <- ranova(finalchla_log)
chla_log.table <- anova(finalchla_log, type = 2) #ANOVA table
chla_log.table
finalchla_logrand
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chlc, main = "Chlorophyll c", xlab = "Chlorophyll")
#### ---- --------------------------------------------------------------------------
lmechlc <- lmer(chlc ~ site + (1|colony),
REML = FALSE,
na.action = na.exclude)
res5 <- resid(lmechlc)
#### ---- eval=FALSE, include=FALSE------------------------------------------------
hist(res5,main = "Chlorophyll c", xlab = "Residuals")
plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
chlc.shapiro <- shapiro.test(res5)
print(chlc.shapiro)
#### ---- --------------------------------------------------------------------------
qqnorm(res5, ylab = "Sample Quantiles for Zoox")
qqline(res5, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(chlc ~ as.factor(site), data = data) # p = 0.01316
#### ---- --------------------------------------------------------------------------
data$chlc_log <- log(chlc)
attach(data)
colnames(data)
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(data$chlc_log, main = "Chlorophyll c (log)", xlab = "log (Chlorophyll)")
#### ---- --------------------------------------------------------------------------
lmechlc_log <- lmer(chlc_log ~ site + (1|colony),
REML = FALSE,
na.action = na.exclude)
res6 <- resid(lmechlc_log)
#### ---- include=FALSE, eval=FALSE------------------------------------------------
hist(res6, main = "Chlorophyll c log", xlab = "Residuals")
plotNormalHistogram(res6, main = "Chlorophyll c log", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res6, main = "Chlorophyll c (log)", xlab = "Residuals")
#### ---- --------------------------------------------------------------------------
chlclog.shapiro <- shapiro.test(res6)
print(chlclog.shapiro)
#### ---- --------------------------------------------------------------------------
qqnorm(res6, ylab = "Sample Quantiles for chlclog")
qqline(res6, col = "red")
#### ---- --------------------------------------------------------------------------
leveneTest(chlc_log ~ as.factor(site), data = data) # p = 0.884
#### ---- --------------------------------------------------------------------------
chlc_log1 <- lmer(chlc_log ~ site + (1|colony/frag),
data = data,
REML = FALSE,
na.action = na.exclude) #AIC 64.52399
ranova(chlc_log1)
#### ---- --------------------------------------------------------------------------
chlc_log2 <- lmer(chlc_log ~ site+(1|colony),
data = data,
REML = FALSE,
na.action = na.exclude)
ranova(chlc_log2)
mod.AIC <- AIC(chlc_log1, chlc_log2)
mod.AIC
#### ---- --------------------------------------------------------------------------
finalchlc_log <- lmer(chlc_log ~ site + (1|colony),
data = data,
REML = TRUE,
na.action = na.exclude)
#### ---- --------------------------------------------------------------------------
finalchlc_logrand <- ranova(finalchlc_log)
chlc_log.table <- anova(finalchlc_log, type = 2) #ANOVA table
chlc_log.table
finalchlc_logrand
#### ---- --------------------------------------------------------------------------
all <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",")
attach(all)
#### ---- --------------------------------------------------------------------------
means.all <- ddply(all, c("site", "colony"), summarise,
N = sum(!is.na(yield)),
yield_mean = mean(yield, na.rm = TRUE),
zoox_mean = mean(zoox, na.rm = TRUE),
chla_mean = mean(chla, na.rm = TRUE),
chlc_mean = mean(chlc, na.rm = TRUE),
sd_yield = sd(yield),
sd_zoox = sd(zoox),
sd_chla = sd(chla),
sd_chlc = sd(chlc),
yield_se = (sd(yield)/sqrt(N)),
zoox_se = (sd(zoox)/sqrt(N)),
chla_se = (sd(chla)/sqrt(N)),
chlc_se = (sd(chlc)/sqrt(N)))
write.csv(means.all, file = "tables/field/allcoralmeans.csv",
row.names = FALSE)
#### ---- --------------------------------------------------------------------------
ggplot(all,aes(x = site, y = yield)) +
geom_boxplot() +
labs(x = "Site", y='Maximum Quantum Yield') +
theme_classic() +
theme(text = element_text(size=14)) +
theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1),
axis.text.y = element_text(colour = "black")) +
scale_y_continuous(expand = c(0,0),
limits = c(0.4,0.75))
#### ---- --------------------------------------------------------------------------
ggplot(all,aes(x=site, y=yield, color=colony)) +
geom_point(size=3) +
labs(x = "Site", y = 'Maximum Quantum Yield') +
theme_classic() +
theme(text = element_text(size=14)) +
theme(axis.text.x = element_text(colour="black", angle=0, hjust=1),
axis.text.y = element_text(colour="black")) +
scale_y_continuous(expand = c(0,0),
limits = c(0.4,0.75))
#### ---- --------------------------------------------------------------------------
ggplot(means.all,aes(x = site, y = yield_mean,fill = site)) +
geom_boxplot() +
labs(x = "Site", y = 'Maximum Quantum Yield') +
theme_classic() +
scale_fill_manual(values = c("steelblue","darkred")) +
theme(text = element_text(size = 14)) +
theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1),
axis.text.y = element_text(colour = "black")) +
scale_y_continuous(expand = c(0,0),
limits = c(0.4,0.75))
#### ---- --------------------------------------------------------------------------
jitter <- position_jitter(width = 0.1)
yield_plot <- ggplot(means.all, aes(x = site, y = yield_mean))
#### ---- --------------------------------------------------------------------------
yp <- yield_plot+
geom_boxplot(color="black",
aes(fill=site),
width=0.4) +
scale_fill_manual(values=c("skyblue3","coral3"),
name= NULL, labels=c("Normoxic","Hypoxic")) +
geom_point(size=2, position=jitter) +
labs(x="Site",y="Maximum Quantum Yield") +
theme_classic() +
theme(legend.position = c(0.85,0.85),
legend.title= element_text(color="transparent"),
axis.text.x = element_text(colour="black"),
axis.text.y = element_text(colour="black"),
axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
theme(text = element_text(size=12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7))
yp
#### ---- --------------------------------------------------------------------------
yield_zoox <- ggplot(means.all, aes(x=site, y=zoox_mean))
zp <- yield_zoox+
geom_boxplot(color="black",
aes(fill=site),
width=0.4) +
scale_fill_manual(values=c("skyblue3","coral3"),
name= NULL, labels=c("Normoxic","Hypoxic")) +
geom_point(size=2, position=jitter) +
labs(x="Site",
y=("Endosymbiont Density (cells cm^-2)")) +
theme_classic() +
theme(legend.position = "none", #
axis.text.x = element_text(colour="black"),
axis.text.y = element_text(colour="black"),
axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
theme(text = element_text(size=12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
scale_y_continuous(labels = scientific,
breaks=seq(0,1.5E6, 4.5E5),
expand = c(0,0),
limits = c(0,1.5E6))
zp
#### ---- --------------------------------------------------------------------------
pigs <- tidyr::gather(means.all, pigment, concentration,chla_mean, chlc_mean)
pigments <- dplyr::select(pigs,site,colony,pigment,concentration)
#### ---- --------------------------------------------------------------------------
pig_plot <- ggplot(pigments,
aes(x=pigment, y=concentration, fill=site))
#### ---- --------------------------------------------------------------------------
pig_plot+
geom_boxplot(color="black") +
scale_fill_manual(values=c("skyblue3","coral3"),
name= NULL, labels=c("Normoxic","Hypoxic"))
#### ---- --------------------------------------------------------------------------
jitter2 <- position_jitter(width=0.05)
chla_plot <- ggplot(means.all,
aes(x=site, y=chla_mean))
#### ---- --------------------------------------------------------------------------
cp <- chla_plot+
geom_boxplot(color="black",
aes(fill=site),
width=0.4) +
scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) +
geom_point(size=2, position=jitter) +
labs(x="Site",
y=("Chl a Concentration (ug cm^-2)")) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(colour="black"),
axis.text.y = element_text(colour="black"),
axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
theme(text = element_text(size=12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
scale_y_continuous(expand = c(0,0),
limits = c(0,6))
cp
#### ---- echo=FALSE, fig.height=4, layout='l-body-outset', eval=FALSE-------------
bar_plots <- yp + zp + cp
bar_plots + plot_annotation(tag_levels = 'A',
title = 'Heatmaps of taxa distribution',
#subtitle = 'Top 30 taxa of non-rarefied data',
caption = 'A) DETAILS,
B) DETAILS,
C) DETAILS'
)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
plot_grid(yp, cp,zp, labels="AUTO", ncol=3, align="v")
#### ---- --------------------------------------------------------------------------
ypp <- yield_plot +
geom_boxplot(color = "black", aes(fill = site), width = 0.4) +
scale_fill_manual(values = c("skyblue3", "coral3"), name = NULL,
labels = c("Normoxic", "Hypoxic")) +
geom_point(size = 1, position = jitter) +
labs(x = "Site", y = ("Maximum Quantum Yield")) +
theme_classic() +
theme(legend.position = "top", legend.direction = "vertical", #
legend.title= element_text(color = "transparent"),
axis.text.x = element_text(size = 7, colour="black"),
axis.text.y = element_text(size = 8, colour = "black"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 9,
margin = margin(t = 0, r = 10,
b = 0, l = 0)),
panel.border = element_rect(colour = "black",
fill = NA, size = 0.5)) +
theme(text = element_text(size = 12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay", "Roldan" = "Cayo Roldan")) +
scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7))
#### ---- --------------------------------------------------------------------------
yzield_zoox <- ggplot(means.all, aes(x = site, y = zoox_mean))
zpp <- yield_zoox+
geom_boxplot(color="black",
aes(fill=site),
width=0.4) +
scale_fill_manual(values=c("skyblue3","coral3"), name= NULL,
labels=c("Normoxic","Hypoxic")) +
geom_point(size=1, position=jitter) +
labs(x = "Site", y=("Endosymbiont Density (cells cm^-2)")) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(size = 7, colour="black"),
axis.title.x = element_text(size = 12),
axis.text.y = element_text(size = 8, colour="black"),
axis.title.y = element_text(size = 9, margin = margin(t=0, r=10,b=0,l=0)),
panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
theme(text = element_text(size=12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
scale_y_continuous(labels = scientific,
breaks=seq(0,1.5E6, 4.5E5),
expand = c(0,0),
limits = c(0,1.5E6))
#### ---- --------------------------------------------------------------------------
chla_plot <- ggplot(means.all, aes(x = site, y = chla_mean))
cpp <- chla_plot +
geom_boxplot(color="black", aes(fill=site), width=0.4) +
scale_fill_manual(values = c("skyblue3","coral3"),
name = NULL, labels = c("Normoxic", "Hypoxic")) +
geom_point(size=1, position=jitter) +
labs(x = "Site", y = ("Chl a Concentration (ug cm^-2)")) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(size = 7, colour="black"),
axis.text.y = element_text(size = 8, colour="black"),
axis.title.y = element_text(size = 9,
margin = margin(t = 0, r = 10,
b = 0, l = 0)),
axis.title.x = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
theme(text = element_text(size=12)) +
scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
scale_y_continuous(expand = c(0,0),
limits = c(0,6))
#### ---- echo=FALSE, fig.height=5, layout='l-body-outset'-------------------------
bar_plots_ft <- ypp + zpp + cpp
bar_plots_ft + plot_annotation(tag_levels = 'A',
title = 'Coral response variables ',
#subtitle = 'Top 30 taxa of non-rarefied data',
caption = 'A) Pulse Amplitude Modulated (PAM),
B) Zooxanthellae,
C) Chlorophyll a (Chla)'
)
#### ---- eval=FALSE, echo=FALSE---------------------------------------------------
panel <- plot_grid(ypp, zpp,cpp, labels="AUTO", ncol=1, align="v")
panel
##########################################################
## 16s rRNA Workflow
##########################################################
#############################
##
## No 1. DADA2 Workflow
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(911)
library(dada2); packageVersion("dada2")
library(ShortRead); packageVersion("ShortRead")
library(ggplot2); packageVersion("ggplot2")
library(rmarkdown)
library(DT)
library(gridExtra)
library(grid)
knitr::opts_current$get(c(
"cache",
"cache.path",
"cache.rebuild",
"dependson",
"autodep"
))
#### ---- sample_seq_table, echo=FALSE, layout="l-body-outset", eval=FALSE----------
seq_table <- read.table("tables/16s-dada2/sample-seq-info-2019-16s.txt",
header = TRUE, sep = "\t")
datatable(seq_table, width = "100%",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'lfrtipB',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8
)
)
cutadapt -g {F-ADAPTER} -G {R-ADAPTER} /
-o ${R1}.trimmed.fastq /
-p {R2}.trimmed.fastq /
${R1} ${R2} /
--discard-untrimmed -e 0.12
#### ---- list_fastq_filesX, eval=FALSE---------------------------------------------
path <- "INPUT_FILES/"
head(list.files(path))
#### ---- dummy_1X, echo=FALSE------------------------------------------------------
c("WCC0_R1.trimmed.fastq", "WCC0_R2.trimmed.fastq", "WCC1_R1.trimmed.fastq",
"WCC1_R2.trimmed.fastq", "WCC2_R1.trimmed.fastq", "WCC2_R2.trimmed.fastq")
#### ---- sort_files, eval=FALSE----------------------------------------------------
fnFs <- sort(list.files(path, pattern = "_R1.trimmed.fastq"))
fnRs <- sort(list.files(path, pattern = "_R2.trimmed.fastq"))
#### ---- split_nameX, eval=FALSE---------------------------------------------------
sample.names <- sapply(strsplit(fnFs, "_"), `[`, 1)
fnFs <-file.path(path, fnFs)
fnRs <-file.path(path, fnRs)
#### ---- plot_qscoresX, layout="l-body-outset", warning=FALSE, fig.height=3, eval=FALSE----
p1 <- plotQualityProfile(fnFs[1:8], aggregate = TRUE)
p2 <- plotQualityProfile(fnRs[1:8], aggregate = TRUE)
p3 <- grid.arrange(p1, p2, nrow = 1)
ggsave("plot_qscores_compare.png", p3, width = 7, height = 3)
#### ---- dummy_2X, echo=FALSE, layout="l-body-outset", warning=FALSE, fig.height=2----
knitr::include_graphics("figures/16s-dada2/plot_qscores_compare.png")
#### ---- move_files, eval=FALSE----------------------------------------------------
filt_path <- file.path(path, "filtered")
filtFs <- file.path(filt_path, paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(filt_path, paste0(sample.names, "_R_filt.fastq.gz"))
#### ---- filterX, eval=FALSE-------------------------------------------------------
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs,
truncLen=c(260,150), maxN=0,
maxEE=c(3,3), truncQ=2,
rm.phix=TRUE, compress=TRUE,
multithread = 20)
#### ---- filter_table, echo=FALSE, eval=FALSE--------------------------------------
datatable(out, width = "80%",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8
)
)
#### ---- dummy_4X, echo=FALSE------------------------------------------------------
out_T <- read.table("tables/16s-dada2/out_T.txt",
header = TRUE)
datatable(out_T, width = "80%",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8
)
)
#### ---- learn_errors_forwardX, eval=FALSE-----------------------------------------
errF <- learnErrors(filtFs, multithread = 20)
#### ---- learn_errors_reverseX, eval=FALSE-----------------------------------------
errR <- learnErrors(filtRs, multithread = 20)
#### ---- plot_errorFX, warning=FALSE, layout="l-body-outset", eval=FALSE-----------
plotErrors(errF, nominalQ = TRUE)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
p4 <- plotErrors(errF, nominalQ = TRUE)
ggsave("plot_errorF_1.png", p4, width = 7, height = 5)
ggsave("plot_errorF_2.png", p4)
#### ---- dummy_6X, echo=FALSE, layout="l-body-outset"------------------------------
knitr::include_graphics("figures/16s-dada2/plot_errorF_1.png")
#### ---- plot_errorRX, warning=FALSE, layout="l-body-outset", eval=FALSE-----------
plotErrors(errR, nominalQ=TRUE)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
p5 <- plotErrors(errR, nominalQ = TRUE)
ggsave("plot_errorR_1.png", p5, width = 7, height = 5)
ggsave("plot_errorR_2.png", p5)
#### ---- dummy_7RX, echo=FALSE, layout="l-body-outset"-----------------------------
knitr::include_graphics("figures/16s-dada2/plot_errorR_1.png")
#### ---- echo=FALSE---------------------------------------------------------------
TO run and cache the derepFastq you must set
`cache.lazy=FALSE`
For both code blocks
#### ---- dereplicate_reads_F, eval=FALSE-------------------------------------------
derepFs <- derepFastq(filtFs)
names(derepFs) <- sample.names
#### ---- dereplicate_reads_R, eval=FALSE-------------------------------------------
derepRs <- derepFastq(filtRs)
names(derepRs) <- sample.names
#### ---- run_dada2_forwardX1, eval=FALSE-------------------------------------------
dadaFs <- dada(derepFs, err = errF, multithread = 20)
#### ---- run_dada2_reverseX1, eval=FALSE-------------------------------------------
dadaRs <- dada(derepRs, err = errR, multithread = TRUE)
#### ---- inspect_f, eval=FALSE-----------------------------------------------------
dadaFs[[1]]
#### ---- inspect_r, eval=FALSE-----------------------------------------------------
dadaRs[[1]]
#### ---- merge_paired_reads, eval=FALSE--------------------------------------------
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs)
#### ---- head_file, eval = FALSE, echo = FALSE-------------------------------------
head(mergers[[1]])
#### ---- seq_tableX, eval=FALSE----------------------------------------------------
seqtab <- makeSequenceTable(mergers)
dim(seqtab)
#### ---- echo=FALSE----------------------------------------------------------------
c(8, 4132)
#### ---- seq_table2X, eval=FALSE---------------------------------------------------
table(nchar(getSequences(seqtab)))
#### ---- trim_lengthX, eval=FALSE--------------------------------------------------
seqtab.2 <- seqtab[,nchar(colnames(seqtab)) %in% seq(360,380)]
dim(seqtab.2)
#### ---- dummy_14X, echo=FALSE-----------------------------------------------------
c(8, 4119)
#### ---- trim_length2X, eval=FALSE------------------------------------------------
table(nchar(getSequences(seqtab.2)))
#### ---- chimera_on_ind_runs, eval=FALSE-------------------------------------------
seqtab.2.nochim <- removeBimeraDenovo(seqtab.2,
method="consensus",
multithread = 20)
dim(seqtab.2.nochim)
#### ---- dummy_16, echo=FALSE------------------------------------------------------
c(8, 1424)
#### ---- chimera_on_ind_runs2, eval=FALSE------------------------------------------
sum(seqtab.2.nochim)/sum(seqtab.2)
#### ---- dummy_17, echo=FALSE------------------------------------------------------
0.874367
#### ---- build_table_to_track_reads, eval=FALSE------------------------------------
getN <- function(x) sum(getUniques(x))
track <- cbind(out,
sapply(dadaFs, getN),
sapply(dadaRs, getN),
sapply(mergers, getN),
rowSums(seqtab.2.nochim))
colnames(track) <- c("input", "filtered", "denoisedF",
"denoisedR", "merged", "nonchim")
rownames(track) <- sample.names
#### ---- dummy_44X, echo=FALSE, layout="l-body-outset"-----------------------------
sum_run <- read.table("tables/16s-dada2/read_changes.txt",
header = TRUE)
datatable(sum_run, width = "80%",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8
)
)
#### ---- track_changes, eval=FALSE, echo=FALSE-------------------------------------
write.table(track, "tables/16s-dada2/read_changes.txt",
sep = "\t", quote = FALSE, col.names=NA)
#### ---- saveRDS, eval=FALSE-------------------------------------------------------
saveRDS(seqtab.2.nochim, "seqtab.2.nochim.rds")
#### ---- read_combo, eval=FALSE----------------------------------------------------
remove(list = ls())
seqtab <- readRDS("seqtab.2.nochim.rds")
#### ---- assign_tax_silva, eval = FALSE--------------------------------------------
tax_silva <- assignTaxonomy(
seqtab, "../taxonomy_databases/silva_nr_v138_train_set.fa.gz",
multithread = TRUE)
#### ---- save_image, eval=FALSE----------------------------------------------------
save.image("rdata/16s-dada2/water_pipeline.rdata")
#############################
##
## No 2. Data Preparation
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
options(scipen=999)
knitr::opts_current$get(c(
"cache",
"cache.path",
"cache.rebuild",
"dependson",
"autodep"
))
#### ---- echo=FALSE---------------------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunkc to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-data-prep/16s-data_prep.rdata")
#### ---- sample_table1, echo=FALSE, eval=FALSE-------------------------------------
joined_tab <- read.table(
"tables/16s-data-prep/read_changes.txt",
header = TRUE, sep = "\t")
joined_tab_percent <- joined_tab
percent_change <- 1-(joined_tab$nonchim/joined_tab$input) %>%
round(digits = 3)
joined_tab_percent$Change <- as.numeric(sprintf("%.3f", percent_change))
colnames(joined_tab_percent) <- c("Sample
ID", "Type", "Site",
"input", "filter", "denoiseF", "denoiseR",
"merged", "nochim", "Change")
#### ---- samp-summary1, echo=FALSE, layout="l-page"--------------------------------
NOTE. For some reason this datatable was being given the same
elementId as the first table. So I had to add a 20 character
elementId https://www.random.org/strings/
datatable(joined_tab_percent, width = "100%", escape = FALSE,
rownames = FALSE, filter = 'top',
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
'Table: ', htmltools::em('Tracking read changes
through DADA2 workflow. Use the buttons to navigate through
the table or download a copy. Table may scroll right. ')),
elementId = "vwltiu9qj77sfy3a07we",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8
)
) %>%
DT::formatStyle(columns = colnames(joined_tab_percent), fontSize = '80%')
cutoff <- 2000
remove_sam <- joined_tab[joined_tab$nonchim <= cutoff, ]
#### ---- deliniate_sample_types, eval=FALSE----------------------------------------
load("rdata/16s-dada2/water_pipeline.rdata")
samples.out <- rownames(seqtab)
subject <- sapply(strsplit(samples.out, "[[:digit:]]"), `[`, 1)
# this splits the string at first instance of a digit
sample_name <- substr(samples.out, 1, 999) # use the whole string for individuals
type <- substr(samples.out, 0, 1) # use the first two letters for sample typle
site <- substr(samples.out, 2, 3) # use the next three letters for site
num_samp <- length(unique(sample_name))
num_type <- length(unique(type))
num_sites <- length(unique(site))
#### ---- define_variables----------------------------------------------------------
#define a sample data frame
samdf <- data.frame(SamName = sample_name, TYPE = type, SITE = site)
rownames(samdf) <- samples.out
#### ---- create_ps_object, eval=FALSE----------------------------------------------
# this create the phyloseq object
ps <- phyloseq(otu_table(seqtab, taxa_are_rows = FALSE),
sample_data(samdf), tax_table(tax_silva))
tax_table(ps) <- cbind(tax_table(ps),
rownames(tax_table(ps)))
# adding unique ASV names
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps)))
tax_table(ps) <- cbind(tax_table(ps),
rownames(tax_table(ps)))
head(taxa_names(ps))
#### ---- echo=FALSE---------------------------------------------------------------
head(taxa_names(ps))
#### ---- calculate_stats_ps, echo=FALSE, eval=FALSE--------------------------------
min_read_ps <- min(readcount(ps))
max_read_ps <- max(readcount(ps))
total_reads_ps <- sum(readcount(ps))
mean_reads_ps <- round(mean(readcount(ps)), digits = 0)
median_reads_ps <- median(readcount(ps))
total_asvs_ps <- ntaxa(ps)
singleton_ps <- tryCatch(ntaxa(rare(ps, detection = 1, prevalence = 0)),
error=function(err) NA)
singleton_ps_perc <- tryCatch(round((100*(ntaxa(rare(ps, detection = 1, prevalence = 0)) /
ntaxa(ps))), digits = 3), error=function(err) NA)
sparsity_ps <- round(length(which(abundances(ps) == 0))/length(abundances(ps)),
digits = 3)
#### ---- add_ASV_coulmn, eval=FALSE------------------------------------------------
colnames(tax_table(ps)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "ASV_SEQ", "ASV_ID")
ps
#### ---- echo=FALSE---------------------------------------------------------------
ps
#### ---- export_seq_tax_tables, eval=FALSE-----------------------------------------
write.table(tax_table(ps),
"tables/16s-data-prep/full_tax_table.txt",
sep="\t", quote = FALSE, col.names=NA)
write.table(t(otu_table(ps)),
"tables/16s-data-prep/full_seq_table.txt",
sep="\t", quote = FALSE, col.names=NA)
#### ---- export_fasta_full, eval=FALSE---------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)
write.table(table2format_trim_df, "tables/16s-data-prep/full_asv.fasta",
sep = "\r", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
#### ---- remove_specific_taxa, eval=FALSE------------------------------------------
# generate a file with mitochondria ASVs
MT1 <- subset_taxa(ps, Family == "Mitochondria")
MT1 <- as(tax_table(MT1), "matrix")
MT1 <- MT1[, 8]
MT1df <- as.factor(MT1)
goodTaxa <- setdiff(taxa_names(ps), MT1df)
ps_no_mito <- prune_taxa(goodTaxa, ps)
ps_no_mito
#### ---- echo=FALSE---------------------------------------------------------------
ps_no_mito
#### ---- remove_specific_taxa2, eval=FALSE-----------------------------------------
# generate a file with mitochondria ASVs
CH1 <- subset_taxa(ps_no_mito, Order == "Chloroplast")
CH1 <- as(tax_table(CH1), "matrix")
CH1 <- CH1[, 8]
CH1df <- as.factor(CH1)
goodTaxa <- setdiff(taxa_names(ps_no_mito), CH1df)
ps_no_chloro <- prune_taxa(goodTaxa, ps_no_mito)
ps_no_chloro
#### ---- echo=FALSE---------------------------------------------------------------
ps_no_chloro
#### ---- remove_specific_taxa3, eval=FALSE, eval=FALSE-----------------------------
# generate a file with mitochondria ASVs
EU1 <- subset_taxa(ps_no_chloro, Kingdom == "Eukaryota")
EU1 <- as(tax_table(EU1), "matrix")
EU1 <- EU1[, 8]
EU1df <- as.factor(EU1)
goodTaxa <- setdiff(taxa_names(ps_no_chloro), EU1df)
ps_no_euk <- prune_taxa(goodTaxa, ps_no_chloro)
ps_no_euk
#### ---- remove_kingdom_na, eval=FALSE---------------------------------------------
ps_filt <- subset_taxa(ps_no_chloro, !is.na(Kingdom))
ps_filt
#### ---- echo=FALSE---------------------------------------------------------------
ps_filt
#### ---- save_trim_table, eval=FALSE-----------------------------------------------
write.table(tax_table(ps_filt),
"tables/16s-data-prep/trim_tax_table.txt", sep="\t",
quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_filt)),
"tables/16s-data-prep/trim_seq_table.txt", sep="\t",
quote = FALSE, col.names=NA)
write.table(sample_data(ps_filt),
"tables/16s-data-prep/sample_table.txt",
sep="\t", quote = FALSE, col.names=NA)
#### ---- export_fasta_water, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps_filt)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)
write.table(table2format_trim_df, "tables/16s-data-prep/trim_asv.fasta",
sep = "\r", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
#### ---- select_water_samples, echo=FALSE, eval=FALSE------------------------------
###############################################
## This is legacy code for making a ps object##
## for a subset of samples ##
###############################################
## Water Samples Phyloseq Object
#The next thing to do is select only water samples and make a separate phyloseq object---since our paper is about the water samples. To do this we must select the samples we *wish to keep*. If you want to change the group of samples, modify the script accordingly.
ps_water <- prune_samples(
c("WCC0", "WCC1", "WCC2", "WCC3", "WCR0", "WCR1", "WCR2", "WCR3"),
ps_filt)
ps_water
#Anytime we remove samples we probably lose some ASVs. So we need to get rid of any ASVs that have a total of **0 reads**.
ps_water <- prune_taxa(taxa_sums(ps_water) > 0, ps_water)
ps_water
#So for water samples only there are `r ntaxa(ps_water)` ASVs and `r nsamples(ps_water)` samples. Now, save the various phyloseq data from the water object.
#### ---- save_data_prep_image, echo=TRUE, eval=FALSE-------------------------------
ps_water <- ps_filt
save.image("rdata/16s-data-prep/16s-data_prep.rdata")
saveRDS(otu_table(ps_water), "rdata/16s-data-prep/ps_water-seqtab.rds")
saveRDS(sample_data(ps_water), "rdata/16s-data-prep/ps_water-sample.rds")
saveRDS(tax_table(ps_water), "rdata/16s-data-prep/ps_water-taxtab.rds")
#############################
##
## No 3. Diversity
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
library(data.table)
library(plyr)
library(stringr)
require(gdata)
library(labdsv)
library(reshape)
options(scipen=999)
knitr::opts_current$get(c(
"cache",
"cache.path",
"cache.rebuild",
"dependson",
"autodep"
))
#### ---- load_water_2, echo=FALSE--------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunk to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-water/16s-water.rdata")
#### ---- load_water_1, eval=FALSE--------------------------------------------------
remove(list = ls())
sample_d <- readRDS("rdata/16s-data-prep/ps_water-sample.rds")
seqtab <- readRDS("rdata/16s-data-prep/ps_water-seqtab.rds")
taxtab <- readRDS("rdata/16s-data-prep/ps_water-taxtab.rds")
ps_water <- merge_phyloseq(sample_d, seqtab, taxtab)
ps_water_o <- ps_water
ps_water
#### ---- echo=FALSE---------------------------------------------------------------
ps_water
#### ---- save_water_table, eval=FALSE----------------------------------------------
write.table(tax_table(ps_water),
"tables/16s-water/water_tax_table.txt", sep="\t",
quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_water)),
"tables/16s-water/water_seq_table.txt", sep="\t",
quote = FALSE, col.names=NA)
#### ---- export_fasta_water, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps_water)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)
write.table(table2format_trim_df, "tables/16s-water/water_asv.fasta",
sep = "\r", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
#### ---- add_var_cats, eval=FALSE--------------------------------------------------
sample_data(ps_water)$oxstate <-
c("normoxic", "normoxic", "normoxic", "normoxic",
"normoxic", "hypoxic", "hypoxic", "hypoxic")
sample_data(ps_water)$period <-
c("cc_after", "cc_during", "cc_during", "cc_during",
"cr_after", "cr_during", "cr_during", "cr_during")
#### ---- calculate_stats_ps_water, echo=FALSE, eval=FALSE--------------------------
### Reads
min_read_ps_water <- min(readcount(ps_water))
max_read_ps_water <- max(readcount(ps_water))
total_reads_ps_water <- sum(readcount(ps_water))
mean_reads_ps_water <- round(mean(readcount(ps_water)), digits = 0)
median_reads_ps_water <- median(readcount(ps_water))
### ASVs
total_asvs_ps_water <- ntaxa(ps_water)
min_asvs_ps_water <- min(estimate_richness(
ps_water, measures = "Observed"))
max_asvs_ps_water <- max(estimate_richness(
ps_water, measures = "Observed"))
mean_asvs_ps_water <- formatC(mean(
estimate_richness(ps_water,
measures = "Observed")$Observed),
digits = 0, format = 'f')
median_asvs_ps_water <- median(
estimate_richness(ps_water,
measures = "Observed")$Observed)
singleton_ps_water <- tryCatch(ntaxa(rare(ps_water,
detection = 1,
prevalence = 0)),
error=function(err) NA)
singleton_ps_water_perc <- tryCatch(round((100*(ntaxa(rare(ps_water,
detection = 1,
prevalence = 0)) /
ntaxa(ps_water))), digits = 3),
error=function(err) NA)
sparsity_ps_water <- round(length(which(abundances(ps_water) == 0))/length(abundances(ps_water)),
digits = 3)
#### ---- sample_summary_table, eval=FALSE------------------------------------------
total_reads <- sample_sums(ps_water)
total_reads <- as.data.frame(total_reads, make.names = TRUE)
total_reads <- total_reads %>% rownames_to_column("Sample_ID")
total_asvs <- estimate_richness(ps_water,
measures = c(
"Observed", "Shannon", "InvSimpson"))
total_asvs <- total_asvs %>% rownames_to_column("Sample_ID")
total_asvs$Sample_ID <- gsub('\\.', '-', total_asvs$Sample_ID)
#### ---- sample_summary_table2, eval=FALSE-----------------------------------------
sam_details <- meta(sample_data(ps_water))
rownames(sam_details) <- NULL
colnames(sam_details) <- c("Sample_ID", "Type", "Site", "Oxstate", "Period")
merge_tab <- merge(sam_details, total_reads, by = "Sample_ID")
merge_tab2 <- merge(merge_tab, total_asvs, by = "Sample_ID")
colnames(merge_tab2) <- c("Sample
ID", "Type", "Site", "Oxstate", "Period",
"total
reads", "total
ASVs", "Shannon", "InvSimpson")
#### ---- sample_summary_table_display, layout="l-body-outset", echo=FALSE----------
datatable(merge_tab2, width = "100%", escape = FALSE,
rownames = FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
'Table: ', htmltools::em('Total reads & ASVs by sample.')),
elementId = "mex738zeiysgiy3fysb8",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 8,
lengthMenu = c(5, 10)
)
) %>%
formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
formatStyle(columns = colnames(merge_tab2), fontSize = '80%')
#### ---- tax_clean, echo=FALSE, eval=FALSE-----------------------------------------
tax.clean <- data.frame(tax_table(ps_water))
for (i in 1:6){ tax.clean[,i] <- as.character(tax.clean[,i])}
tax.clean[is.na(tax.clean)] <- ""
for (i in 1:nrow(tax.clean)){
if (tax.clean[i,2] == ""){
kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
tax.clean[i, 2:6] <- kingdom
} else if (tax.clean[i,3] == ""){
phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
tax.clean[i, 3:6] <- phylum
} else if (tax.clean[i,4] == ""){
class <- paste("Class_", tax.clean[i,3], sep = "")
tax.clean[i, 4:6] <- class
} else if (tax.clean[i,5] == ""){
order <- paste("Order_", tax.clean[i,4], sep = "")
tax.clean[i, 5:6] <- order
} else if (tax.clean[i,6] == ""){
tax.clean$Genus[i] <- paste("Family",tax.clean$Family[i], sep = "_")
}
}
tax_table(ps_water) <- as.matrix(tax.clean)
rank_names(ps_water)
rm(class, order, phylum, kingdom)
#### ---- echo=FALSE---------------------------------------------------------------
rank_names(ps_water)
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
write.table(tax_table(ps_water),
"tables/16s-water/water_tax_table-no-na-names.txt", sep="\t",
quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_water)),
"tables/16s-water/water_seq_table-no-na-names.txt", sep="\t",
quote = FALSE, col.names=NA)
#### ---- assign_rank, eval=FALSE---------------------------------------------------
TRANK <- "Class"
#### ---- diversity_table_w1, eval=FALSE--------------------------------------------
tax_asv <- table(tax_table(ps_water)[, TRANK],
exclude = NULL, dnn = "Taxa")
tax_asv <- as.data.frame(tax_asv, make.names = TRUE, stringsAsFactors=FALSE)
tax_reads <- factor(tax_table(ps_water)[, TRANK])
tax_reads <- apply(otu_table(ps_water), MARGIN = 1, function(x)
{
tapply(x, INDEX = tax_reads, FUN = sum, na.rm = TRUE, simplify = TRUE)
})
tax_reads <- as.data.frame(tax_reads, make.names = TRUE)
tax_reads <- cbind(tax_reads, reads = rowSums(tax_reads))
tax_reads <- tax_reads[9]
tax_reads <- setDT(tax_reads, keep.rownames = TRUE)[]
#### ---- diversity_table_w2, eval=FALSE--------------------------------------------
taxa_read_asv_tab <- merge(tax_reads, tax_asv, by.x = "rn", by.y = "Taxa")
top_reads <- top_n(taxa_read_asv_tab, n = 8, wt = reads)
top_asvs <- top_n(taxa_read_asv_tab, n = 8, wt = Freq)
names(taxa_read_asv_tab) <- c("Taxa", "total reads", "total ASVs")
#### ---- diversity_table_w3, echo=FALSE--------------------------------------------
datatable(
taxa_read_asv_tab, rownames = FALSE, width = "100%",
colnames = c("Taxa", "total reads", "total ASVs"),
caption = htmltools::tags$caption(
style = "caption-side: bottom; text-align: left;",
"Total reads & ASVs by taxa" ),
elementId = "9sgooxcmkzw565dtk7or",
extensions = "Buttons",
options = list(
columnDefs = list(list(className = "dt-left", targets = 0)),
dom = "Blfrtip",
buttons = c("csv", "copy"),
scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
pageLength = 5,
scroller=TRUE, lengthMenu = c(5, 10, 25, 60)))
top_reads2 <- tibble::column_to_rownames(top_reads, var = "rn")
top_reads2 <- top_reads2[with(top_reads2, order(-reads)),]
top_asvs2 <- tibble::column_to_rownames(top_asvs, var = "rn")
top_asvs2 <- top_asvs2[with(top_asvs2, order(-Freq)),]
#### ---- assign_rank2, eval=FALSE--------------------------------------------------
TRANK <- "Class"
#### ---- calc_avg2, warning=FALSE, eval=FALSE--------------------------------------
ps_water_AVG <- transform_sample_counts(ps_water, function(x) x/sum(x))
ps_water_ox <- merge_samples(ps_water, "oxstate")
SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate")
#### ---- calc_avg3, warning=FALSE, eval=FALSE--------------------------------------
mdata_phy_w <- tax_glom(ps_water_ox, taxrank = TRANK, NArm = FALSE)
mdata_phyrel_w <- transform_sample_counts(mdata_phy_w, function(x) x/sum(x))
meltd_w <- psmelt(mdata_phyrel_w)
meltd_w[[TRANK]] <- as.character(meltd_w[[TRANK]])
#### ---- calc_avg4, warning=FALSE, eval=FALSE--------------------------------------
means_w <- ddply(meltd_w, ~get(TRANK), function(x) c(mean = mean(x$Abundance)))
colnames(means_w) <- c(TRANK, "mean")
means_w$mean <- round(means_w$mean, digits = 5)
taxa_means_w <- means_w[order(-means_w$mean), ] # this orders in decending fashion
taxa_means_w <- format(taxa_means_w, scientific = FALSE) # ditch the sci notation
#### ---- calc_avg5, warning=FALSE, echo=FALSE--------------------------------------
datatable(
taxa_means_w, rownames = FALSE, width = "100%",
colnames = c(TRANK, "mean"),
caption = htmltools::tags$caption(
style = "caption-side: bottom; text-align: left;",
"Relative abundance by taxonomic rank. "),
elementId = "t9xg4oo3fbjqjqu1ogjm",
extensions = "Buttons",
options = list(columnDefs = list(list(
className = "dt-left", targets = "_all")),
dom = "Blfrtip", buttons = c("csv", "copy"),
scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
pageLength = 10,
scroller=TRUE, lengthMenu = c(10, 25, 60)))
#### ---- calc_avg6, warning=FALSE, eval=FALSE--------------------------------------
TAXAN <- 8
top_perc_w <- top_n(taxa_means_w, n = TAXAN, wt = mean)
top_perc_w$mean <- round(as.numeric(top_perc_w$mean), digits = 5)
min_top_perc_w <- round(as.numeric(min(top_perc_w$mean)), digits = 5)
top_perc_w_list <- top_perc_w[,1]
#### ---- make_other_n_v_h, eval=FALSE----------------------------------------------
Other_w <- means_w[means_w$mean < min_top_perc_w, ][[TRANK]]
meltd_w[meltd_w[[TRANK]] %in% Other_w, ][[TRANK]] <- "Other"
samp_names_w <- aggregate(meltd_w$Abundance, by = list(meltd_w$Sample), FUN = sum)[, 1]
.e_w <- environment()
meltd_w[, TRANK] <- factor(meltd_w[, TRANK], sort(unique(meltd_w[, TRANK])))
meltd_w1 <- meltd_w[order(meltd_w[, TRANK]), ]
target <- c("Cyanobacteriia", "Alphaproteobacteria",
"Bacteroidia", "Gammaproteobacteria",
"Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)",
"Acidimicrobiia", "Campylobacteria", "Other")
meltd_w1[[TRANK]] <- reorder.factor(meltd_w1[[TRANK]], new.order = target)
#### ---- make_fig_n_v_h, echo=FALSE, eval=FALSE------------------------------------
family_pal <- c("#009E73", #1
"#56B4E9", #2
"#CC79A7", #3
"#0072B2", #4
"#D55E00", #5
"#B6DBFF", #6
"#E69F00", #7
"#F0E442", #8
"#323232", #9
"#7F7F7F")
fig_w <- ggplot(meltd_w1,
aes_string(x = "Sample", y = "Abundance",
fill = TRANK, show.legend = FALSE),
ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label")
fig_w <- fig_w + geom_bar(stat = "identity",
position = position_stack(reverse = TRUE),
width = 0.95) + coord_flip() +
theme(aspect.ratio = 1/4) + theme(legend.position = "bottom")
fig_w <- fig_w + scale_fill_manual(values = family_pal)
fig_w <- fig_w + labs(x = "Environment",
y = "Relative abundance (% total reads)")
fig_w <- fig_w + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) +
theme(legend.key = element_rect(colour = "black"))
fig_w <- fig_w + guides(
fill = guide_legend(override.aes = list(colour = NULL),
reverse = FALSE,
title.position = "top", nrow = 3))
fig_w <- fig_w + theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black",
fill = NA, size = 1))
fig_w
#ggsave("oxstate_by_taxa.png")
#invisible(dev.off())
#### ---- echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-water/bar-norm-v-hypox.png")
#### ---- hyp_v_norm_agg, eval=FALSE------------------------------------------------
hyp_v_norm <- aggregate(meltd_w1$Abundance,
by = list(Rank = meltd_w1[[TRANK]],
Sample = meltd_w1$Sample),
FUN = sum)
hyp_v_norm$x <- round(hyp_v_norm$x, digits = 4)
hyp_v_norm <- spread(hyp_v_norm, Sample, x)
#### ---- sample_summary_table_display2, echo=FALSE---------------------------------
datatable(hyp_v_norm, width = "100%", escape = FALSE,
rownames = FALSE,
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
'Table: ', htmltools::em('Relative abundance (reads) of normoxic
vs hypoxic samples by dominant taxa.')),
elementId = "wewuidn73q2ceico4vyw",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 9
)
) #%>%
# formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
# formatStyle(columns = colnames(merge_tab2), fontSize = '80%')
#### ---- assign_rankB, echo=FALSE, eval=FALSE--------------------------------------
TRANK <- "Class"
#### ---- calc_avg2B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
ps_water_tm <- merge_samples(ps_water, "period")
SD_BAR_tm <- merge_samples(sample_data(ps_water_AVG), "period")
#### ---- calc_avg3B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
mdata_phy_tm <- tax_glom(ps_water_tm, taxrank = TRANK, NArm = FALSE)
mdata_phyrel_tm <- transform_sample_counts(mdata_phy_tm, function(x) x/sum(x))
meltd_tm <- psmelt(mdata_phyrel_tm)
meltd_tm[[TRANK]] <- as.character(meltd_tm[[TRANK]])
#### ---- calc_avg4B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
means_tm <- ddply(meltd_tm, ~get(TRANK), function(x) c(mean = mean(x$Abundance)))
colnames(means_tm) <- c(TRANK, "mean")
means_tm$mean <- round(means_tm$mean, digits = 5)
taxa_means_tm <- means_tm[order(-means_tm$mean), ] # this orders in decending fashion
taxa_means_tm <- format(taxa_means_tm, scientific = FALSE) # ditch the sci notation
#### ---- calc_avg5B, warning=FALSE, echo=FALSE-------------------------------------
datatable(
taxa_means_tm, rownames = FALSE, width = "100%",
colnames = c(TRANK, "mean"),
caption = htmltools::tags$caption(
style = "caption-side: bottom; text-align: left;",
"Relative abundance by taxonomic rank. "),
elementId = "zswbg7x7f8eqj526xuox",
extensions = "Buttons",
options = list(columnDefs = list(list(
className = "dt-left", targets = "_all")),
dom = "Blfrtip", buttons = c("csv", "copy"),
scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
pageLength = 9,
scroller=TRUE, lengthMenu = c(9, 25, 60)))
#### ---- calc_avg6B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
TAXAN <- 8
top_perc_tm <- top_n(taxa_means_tm, n = TAXAN, wt = mean)
top_perc_tm$mean <- round(as.numeric(top_perc_tm$mean), digits = 5)
min_top_perc_tm <- round(as.numeric(min(top_perc_tm$mean)), digits = 5)
top_perc_tm_list <- top_perc_tm[,1]
#### ---- make_other_d_v_a, echo=FALSE, eval=FALSE----------------------------------
Other_tm <- means_tm[means_tm$mean < min_top_perc_tm, ][[TRANK]]
meltd_tm[meltd_tm[[TRANK]] %in% Other_tm, ][[TRANK]] <- "Other"
samp_names_tm <- aggregate(meltd_tm$Abundance, by = list(meltd_tm$Sample), FUN = sum)[, 1]
.e_tm <- environment()
meltd_tm[, TRANK] <- factor(meltd_tm[, TRANK], sort(unique(meltd_tm[, TRANK])))
meltd_tm1 <- meltd_tm[order(meltd_tm[, TRANK]), ]
target <- c("Cyanobacteriia", "Alphaproteobacteria",
"Bacteroidia", "Gammaproteobacteria",
"Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)",
"Acidimicrobiia", "Campylobacteria", "Other")
meltd_tm1[[TRANK]] <- reorder.factor(meltd_tm1[[TRANK]], new.order=target)
#### ---- make_fig_d_v_a, echo=FALSE, layout="l-body-outset", eval=FALSE------------
family_pal <- c("#009E73", #1
"#56B4E9", #2
"#CC79A7", #3
"#0072B2", #4
"#D55E00", #5
"#B6DBFF", #6
"#E69F00", #7
"#F0E442", #8
"#323232", #9
"#7F7F7F")
fig_tm <- ggplot(meltd_tm1,
aes_string(x = "Sample", y = "Abundance",
fill = TRANK, show.legend = FALSE),
ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label")
fig_tm <- fig_tm + geom_bar(stat = "identity",
position = position_stack(reverse = TRUE),
width = 0.95) + coord_flip() +
theme(aspect.ratio = 1/4) + theme(legend.position = "bottom")
fig_tm <- fig_tm + scale_fill_manual(values = family_pal)
fig_tm <- fig_tm + labs(x = "Environment",
y = "Relative abundance (% total reads)",
title = "Abundance of taxa before & after event")
fig_tm <- fig_tm + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) +
theme(legend.key = element_rect(colour = "black"))
fig_tm <- fig_tm + guides(
fill = guide_legend(override.aes = list(colour = NULL),
reverse = FALSE,
title.position = "top", nrow = 3))
fig_tm <- fig_tm + theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black",
fill = NA, size = 1))
fig_tm
#ggsave("period_by_taxa.png")
#invisible(dev.off())
#### ---- echo=FALSE, warning=FALSE, fig.height=2, layout="l-body-outset"----------
knitr::include_graphics("figures/16s-water/bar-before-v-after.png")
#### ---- before_v_after1, echo=FALSE, eval=FALSE-----------------------------------
before_v_after <- aggregate(meltd_tm1$Abundance,
by = list(Rank = meltd_tm1[[TRANK]],
Sample = meltd_tm1$Sample),
FUN = sum)
before_v_after$x <- round(before_v_after$x, digits = 4)
before_v_after <- spread(before_v_after, Sample, x)
#### ---- before_v_after2, echo=FALSE-----------------------------------------------
datatable(before_v_after, width = "100%", escape = FALSE,
rownames = FALSE, colnames = c("Crawl Caye (after)",
"Crawl Caye (during)",
"Cayo Roldan (after)",
"Cayo Roldan (during)"),
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
'Table: ', htmltools::em('Relative abundance (reads) of normoxic
vs hypoxic samples by dominant taxa.')),
elementId = "kqtr8k9xeqyk0sb1s4h4",
extensions = 'Buttons', options = list(
scrollX = TRUE,
dom = 'Brti',
buttons = c('copy', 'csv', 'excel'),
pageLength = 9
)
) #%>%
# formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
# formatStyle(columns = colnames(merge_tab2), fontSize = '80%')
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
saveRDS(ps_water, "rdata/16s-water/ps_water.rds")
saveRDS(ps_water_o, "rdata/16s-water/ps_water_o.rds")
saveRDS(ps_water_ox, "rdata/16s-water/ps_water_ox.rds")
save.image("rdata/16s-water/16s-water.rdata")
#############################
##
## No 4. Differentially Abundant ASVs
##
#############################
#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
library(data.table)
library(plyr)
library(stringr)
require(gdata)
library(labdsv)
library(reshape)
library(metacoder)
library(naniar)
options(scipen=999)
knitr::opts_current$get(c(
"cache",
"cache.path",
"cache.rebuild",
"dependson",
"autodep"
))
#### ---- echo=FALSE---------------------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunkc to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-da-asvs/da_asvs_pipeline.rdata")
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
remove(list = ls())
ps_water <- readRDS("rdata/16s-water/ps_water.rds")
ps_water_o <- readRDS("rdata/16s-water/ps_water_o.rds")
ps_water_ox <- readRDS("rdata/16s-water/ps_water_ox.rds")
load("rdata/16s-water/16s-water.rdata")
#### ---- idval_analysis, results='hide', eval=FALSE--------------------------------
set.seed(119)
#rm(class, order, phylum, kingdom)
#ls(all.names = TRUE)
water_seq_table <- data.frame(otu_table(ps_water))
# Delete columns when sum == 0 Should not be any
water_seq_table <- water_seq_table[, which(colSums(water_seq_table) != 0)]
water_seq_table <- cbind(oxstate = c("normoxic","normoxic",
"normoxic","normoxic",
"normoxic","hypoxic",
"hypoxic","hypoxic"),
water_seq_table)
iva <- indval(water_seq_table[,-1], water_seq_table[,1])
gr <- iva$maxcls[iva$pval <= 0.05]
iv <- iva$indcls[iva$pval <= 0.05]
pv <- iva$pval[iva$pval <= 0.05]
fr <- apply(water_seq_table[,-1] > 0, 2, sum)[iva$pval <= 0.05]
indvalsummary <- data.frame(group = gr, indval = iv,
pvalue = pv, freq = fr)
indvalsummary <- indvalsummary[order(indvalsummary$group,
-indvalsummary$indval),]
water_tax_table <- data.frame(tax_table(ps_water))
indvalsummary_tax <- merge(indvalsummary, water_tax_table,
by = "row.names",
all = TRUE)
indvalsummary_tax <- indvalsummary_tax[!(is.na(indvalsummary_tax$group)),]
lapply(indvalsummary_tax, class)
class(indvalsummary_tax$group) = "character"
indvalsummary_tax$group <- ifelse(indvalsummary_tax$group == "1",
as.character("hypoxic"),
as.character("normoxic"))
#### ---- idval_tables, echo=FALSE, eval=FALSE--------------------------------------
water_seq_table_t <- water_seq_table
water_seq_table_t[,1] <- NULL
water_seq_table_t <- as.data.frame(t(water_seq_table_t))
water_seq_table_t$reads <- rowSums(water_seq_table_t)
water_seq_table_t$norm <- rowSums(water_seq_table_t[1:5])
water_seq_table_t$hypo <- rowSums(water_seq_table_t[6:8])
water_seq_table_t[1:8] <- NULL
water_seq_table_t <- tibble::rownames_to_column(water_seq_table_t, "ASV")
names(indvalsummary_tax)[1] <- "ASV"
full_indval_sum <- merge(water_seq_table_t, indvalsummary_tax,
by = "ASV",
all = FALSE)
indvalsummary <- tibble::rownames_to_column(indvalsummary, "ASV")
write.table(full_indval_sum, "tables/16s-da-asvs/indval_tax.txt",
row.names = FALSE, sep = "\t", quote = FALSE)
write.table(indvalsummary, "tables/16s-da-asvs/indval_summary.txt",
row.names = FALSE, sep = "\t", quote = FALSE)
#### ---- scg_tax, echo=FALSE, layout="l-page"--------------------------------------
full_indval_sum <- read.table("tables/16s-da-asvs/indval_tax.txt", header = TRUE)
full_indval_sum_noseq <- full_indval_sum
full_indval_sum_noseq[,c(15:16)] <- NULL
datatable(colnames = c("ASV", "reads", "norm", "hypo",
"group", "indval", "pval", "freq",
"Kingdom", "Phylum", "Class", "Order",
"Family", "Genus"),
full_indval_sum_noseq, rownames = FALSE, autoHideNavigation = TRUE,
width = "100%", elementId = "twykw9v4dzmp1lymga92",
caption = htmltools::tags$caption(
style = "caption-side: bottom; text-align: left;",
"Differentially Abundant ASVs assessed by
Indicator Species Analysis (ISA)."),
extensions = c("Buttons", "FixedColumns"),
options = list(autoWidth = TRUE,
columnDefs = list(list(className = "dt-left", targets = 0)),
dom = "Blfrtip",
buttons = list(list(
extend = 'collection',
buttons = c('csv', 'excel', 'copy'),
text = 'Download')),
pageLength = 10, lengthMenu = c(5, 10, 50, 160),
scrollX = TRUE, scrollCollapse = FALSE, scrollY=FALSE, paging=TRUE,
fixedColumns = list(leftColumns = 1, rightColumns = 0))) %>%
formatRound(columns=c("indval", "pvalue"), digits = 3) %>%
formatStyle(columns = colnames(full_indval_sum_noseq), fontSize = '80%')
#### ---- export_da_fasta, echo=FALSE, eval=FALSE-----------------------------------
# Create fasta file from tax_table
table2format2 <- full_indval_sum
table2format2_t <- table2format2[table2format2$reads >= 100 ,]
#retain only the column with the sequences
table2format_trim2 <- table2format2[, 16:15]
table2format_trim2_t <- table2format2_t[, 16:15]
#format fasta
table2format_trim2$ASV_ID <- sub("ASV", ">ASV", table2format_trim2$ASV_ID)
table2format_trim2_t$ASV_ID <- sub("ASV", ">ASV", table2format_trim2_t$ASV_ID)
write.table(table2format_trim2, "tables/16s-da-asvs/idval_asv_all.fasta",
sep = "\n", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
write.table(table2format_trim2_t, "tables/16s-da-asvs/idval_asv_100.fasta",
sep = "\n", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
full_tally <- table(indvalsummary_tax$group)
trim_tally <- table(table2format2$group)
#### ---- eval=FALSE---------------------------------------------------------------
ps_water_100 <- prune_taxa(taxa_sums(ps_water) > 100, ps_water)
ps_water_100
#### ---- echo=FALSE---------------------------------------------------------------
ps_water_100
#### ---- eval=FALSE---------------------------------------------------------------
data_tab <- as.data.frame(t(otu_table(ps_water_100)))
data_tab <- data_tab %>% rownames_to_column("Group")
write.table(data_tab, "16s-anvio/data.txt", sep = "\t",
quote = FALSE, row.names = FALSE)
#### ---- eval=FALSE---------------------------------------------------------------
data_tab2 <- as.data.frame(t(otu_table(ps_water_100)))
total_reads2 <- cbind(data_tab2, total_reads = rowSums(data_tab2))
total_reads2[1:8] <- NULL
tax_tab2 <- as.data.frame(tax_table(ps_water_100))
tax_tab2[7] <- NULL
total_reads2 <- tibble::rownames_to_column(total_reads2, "Group")
tax_tab2 <- tibble::rownames_to_column(tax_tab2, "Group")
merge_tabX <- merge(total_reads2, tax_tab2,
by = "Group",
all = TRUE)
drop_c <- c("reads", "norm", "hypo", "Kingdom",
"Phylum", "Class", "Order", "Family", "Genus")
idval2 <- full_indval_sum_noseq[ , !(names(full_indval_sum_noseq) %in% drop_c)]
colnames(idval2)[1:2] <- c("Group", "enriched")
additional_layers <- merge(idval2, merge_tabX,
by = "Group",
all.y = TRUE)
write.table(additional_layers, "16s-anvio/additional_layers.txt",
sep = "\t", quote = FALSE, row.names = FALSE, na = "")
#### ---- eval=FALSE---------------------------------------------------------------
additional_view <- merge_tab2
additional_view[2] <- NULL
colnames(additional_view) <- c("id", "site", "oxstate", "period", "no_reads",
"no_asvs", "Shannon", "InvSimpson")
write.table(additional_view, "16s-anvio/additional_views.txt",
sep = "\t", quote = FALSE, row.names = FALSE, na = "")
#### ---- eval=FALSE---------------------------------------------------------------
# Make the table
anvio_taxa <- ps_water
glom_rank <- tax_glom(anvio_taxa, taxrank = 'Class')
melt_rank <- psmelt(glom_rank)
melt_rank$Class <- as.character(melt_rank$Class)
rank_abundance <- aggregate(Abundance~Sample+Class, melt_rank, FUN = sum)
rank_abundance <- as.data.frame(cast(rank_abundance, Sample ~ Class))
#rank_abundance <- as.data.frame(rank_abundance)
rank_abundance <- tibble::remove_rownames(rank_abundance)
rank_abundance <- tibble::column_to_rownames(rank_abundance, "Sample")
# Reorder table column by summ
layers <- rank_abundance[,names(sort(colSums(rank_abundance), decreasing = TRUE))]
# Add the prefix
layers <- layers %>% dplyr::rename_all(function(x) paste0("t_class!", x))
layers <- tibble::rownames_to_column (layers, "taxon")
# save the table
write.table(layers, "16s-anvio/tax_layers.txt",
sep = "\t", quote = FALSE, row.names = FALSE, na = "")
#### ---- eval=FALSE---------------------------------------------------------------
sam_tree <- read_file("16s-anvio/sample.tre")
sam_tree <- gsub("[\r\n]", "", sam_tree)
item_name <- c("euc_ward")
data_type <- c("newick")
data_value <- c(sam_tree)
sam_tre_tab <- data.frame(item_name, data_type, data_value)
library(janitor)
sam_tre_tab %>% remove_empty("rows")
write.table(sam_tre_tab, "16s-anvio/sample_tree_tab.txt",
sep = "\t", quote = FALSE, row.names = FALSE, na = "")
#### ---- export_fasta_anvio, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
anvio_tab <- tax_table(ps_water_100)
#retain only the column with the sequences
anvio_tab_trim <- anvio_tab[, 7]
anvio_tab_trim_df <- data.frame(row.names(anvio_tab_trim),
anvio_tab_trim)
colnames(anvio_tab_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
anvio_tab_trim_df$ASV_ID <- sub("ASV", ">ASV", anvio_tab_trim_df$ASV_ID)
write.table(anvio_tab_trim_df, "16s-anvio/anvio.fasta",
sep = "\r", col.names = FALSE, row.names = FALSE,
quote = FALSE, fileEncoding = "UTF-8")
#### ---- echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-da-asvs/before.png")
#### ---- echo=FALSE, layout="l-page", warning=FALSE, fig.height=3-----------------
knitr::include_graphics("figures/16s-da-asvs/after.png")
#### ---- echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-da-asvs/cyano_example.png")
#### ---- eval=FALSE---------------------------------------------------------------
ps_water_mc <- ps_water
#### ---- add_var_cats, echo=FALSE, eval=FALSE--------------------------------------
## USE This code if you want to start with NA taxa before rename
## First, we need to start with the original phyloseq object (`ps_water_o`) before we modified it in any way. We will copy it then add in metadata (oxstate and period) as before. This way we can have a phyloseq object specifically for metacoder.
ps_water_mc <- ps_water_o
sample_data(ps_water_mc)$oxstate <-
c("normoxic", "normoxic", "normoxic", "normoxic",
"normoxic", "hypoxic", "hypoxic", "hypoxic")
sample_data(ps_water_mc)$period <-
c("cc_after", "cc_during", "cc_during", "cc_during",
"cr_after", "cr_during", "cr_during", "cr_during")
#### ---- eval=FALSE---------------------------------------------------------------
ps_water_mc_100 <- prune_taxa(taxa_sums(ps_water_mc) > 100, ps_water_mc)
ps_water_mc_10 <- prune_taxa(taxa_sums(ps_water_mc) > 10, ps_water_mc)
total_reads_ps_water <- sum(readcount(ps_water_mc))
total_reads_ps_water_100 <- sum(readcount(ps_water_mc_100))
total_reads_ps_water_10 <- sum(readcount(ps_water_mc_10))
ps_obj <- ps_water_mc_100
#### ---- eval=FALSE---------------------------------------------------------------
#summarize_phyloseq(ps_water_o)
#summarize_phyloseq(ps_water_mc)
tax_tab1 <- as.data.frame(tax_table(ps_obj))
tax_tab1 <- tibble::rownames_to_column(tax_tab1, "otu_id")
asv_tab1 <- as.data.frame(t(otu_table(ps_obj)))
asv_tab1 <- tibble::rownames_to_column(asv_tab1, "otu_id")
sam_tab1 <- data.frame(sample_data(ps_obj))
sam_tab1[1] <- NULL
da_samp <- tibble::rownames_to_column(sam_tab1, "sample_id")
water_mc <- merge(asv_tab1, tax_tab1, by="otu_id")
water_mc$ASV_SEQ <- NULL
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
#This code replaces all Genus names with `NA`. Comment out to keep names.
# but it messes things up
#gen_names <- water_mc$Genus
#water_mc <- water_mc %>% replace_with_na_at(.vars = "Genus",
# condition = ~.x %in% gen_names)
#### ---- eval=FALSE---------------------------------------------------------------
obj <- parse_tax_data(water_mc, class_cols = c("Kingdom", "Phylum",
"Class", "Order",
"Family", "Genus", "ASV_ID" ))
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
cols = da_samp$sample_id)
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund",
groups = da_samp$oxstate,
cols = da_samp$sample_id)
#### ---- eval=FALSE---------------------------------------------------------------
obj$data$diff_table <- compare_groups(obj,
data = "tax_abund",
cols = da_samp$sample_id,
groups = da_samp$oxstate)
#### ---- echo=FALSE, eval=FALSE----------------------------------------------------
### THIS is optional if you want to remove low abund taxa AFTER running the comparative stats
obj_t <- obj
obj_t$data$tax_data <- zero_low_counts(obj_t, data = "tax_data", min_count = 100, use_total = TRUE)
no_reads <- rowSums(obj_t$data$tax_data[, da_samp$sample_id]) == 0
sum(no_reads)
obj_t <- filter_obs(obj_t, data = "tax_data", ! no_reads, drop_taxa = TRUE)
#### ---- echo=FALSE, eval=FALSE----------------------------------------------------
### THIS is optional if you want to run fdr
obj$data$diff_table$wilcox_p_value <- p.adjust(obj$data$diff_table$wilcox_p_value,
method = "fdr")
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
range(obj$data$diff_table$log2_median_ratio[is.finite(obj$data$diff_table$log2_median_ratio)])
#### ---- eval=FALSE---------------------------------------------------------------
#set.seed(1999)
obj %>%
filter_taxa(taxon_names %in% c("Bacteria"), subtaxa = TRUE) %>%
# filter_taxa(taxon_ranks == "o", supertaxa = TRUE) %>% # subset to the order rank
# filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia", "Archaea"), subtaxa = TRUE, invert = TRUE) # to remove taxa
heat_tree(
node_label = taxon_names,
node_size = n_obs,
# node_size_range = c(0.01, 0.05),
node_label_size_range = c(0.008, 0.04),
node_color = log2_median_ratio,
node_color_interval = c(-5.5, 4),
edge_color_interval = c(-5.5, 4),
node_color_trans = "area",
node_color_range = c("#D55E00", "gray", "#0072B2"),
node_size_axis_label = "OTU count",
node_color_axis_label = "Log 2 median ratio",
layout = "da",
initial_layout = "re",
overlap_avoidance = 2,
output_file = "figures/16s-da-asvs/differential_heat_tree.png")
#### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree.png")
#### ---- eval=FALSE----------------------------------------------------------------
## This is for the example above
#set.seed(10)
obj %>%
filter_taxa(taxon_names %in% c("Cyanobacteria"),
subtaxa = TRUE) %>%
heat_tree(node_label = taxon_names,
node_size = n_obs,
node_size_range = c(0.02, 0.03),
node_label_size_range = c(0.02, 0.04),
node_color = log2_median_ratio,
node_color_interval = c(-5, 5),
edge_color_interval = c(-5, 5),
node_color_range = c("#D55E00", "gray", "#0072B2"),
tree_label = taxon_names,
initial_layout = "re", layout = "da",
node_color_axis_label = "Sum of root reads",
node_size_axis_label = "Number of OTUs",
output_file = "figures/16s-da-asvs/cyano_example.svg")
#### ---- eval=FALSE---------------------------------------------------------------
#set.seed(10)
obj %>%
filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia"),
subtaxa = TRUE) %>%
heat_tree(node_label = taxon_names,
node_size = n_obs,
node_color = log2_median_ratio,
node_color_range = c("#D55E00", "gray", "#0072B2"),
node_color_interval = c(-3, 3),
tree_label = taxon_names,
initial_layout = "re", layout = "da",
node_color_axis_label = "Sum of root reads",
node_size_axis_label = "Number of OTUs",
output_file = "figures/16s-da-asvs/by_major_taxa.png")
#### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/by_major_taxa.png")
#### ---- eval=FALSE---------------------------------------------------------------
set.seed(1)
obj$data$diff_table2 <- compare_groups(obj,
data = "tax_abund",
cols = da_samp$sample_id, # What columns of sample data to use
groups = da_samp$period) # What category each sample is assigned to
heat_tree_matrix(obj,
data = "diff_table2",
node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
node_label = taxon_names,
node_color = log2_median_ratio, # A column from `obj$data$diff_table`
node_color_range = diverging_palette(), # The built-in palette for diverging data
node_color_trans = "linear", # The default is scaled by circle area
node_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display
edge_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display
node_size_axis_label = "Number of OTUs",
node_color_axis_label = "Log2 ratio median proportions",
layout = "davidson-harel", # The primary layout algorithm
initial_layout = "reingold-tilford", # The layout algorithm that initializes node locations
output_file = "figures/16s-da-asvs/differential_heat_tree2.png") # Saves the plot as a png file
#### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree2.png")
#### ---- echo=FALSE, eval=FALSE---------------------------------------------------
save.image("rdata/16s-da-asvs/da_asvs_pipeline.rdata")
#### ---- calc_avg2, warning=FALSE, echo=FALSE, eval=FALSE--------------------------
### Script for DESeq NOT FINISHED
library(DESeq2)
ds2 <- phyloseq_to_deseq2(ps_water, ~ oxstate)
dds <- DESeq(ds2)
res <- results(dds)
deseq.results <- as.data.frame(res)
df <- deseq.results
df$taxon <- rownames(df)
df <- df %>% arrange(log2FoldChange, padj)
write.table(res, "res.txt")
###########################
ps_water_avg <- transform_sample_counts(ps_water, function(x) x/sum(x))
t(otu_table(ps_water_avg))
pseq.compositional <- microbiome::transform(ps_water, "compositional")
t(otu_table(pseq.compositional))
mergedGP <- merge_samples(ps_water_avg, "oxstate")
SD <- merge_samples(sample_data(ps_water_avg), "oxstate")
OTUnames10 = names(sort(taxa_sums(ps_water), TRUE)[1:100])
GP10 = prune_taxa(OTUnames10, ps_water)
t(otu_table(GP10))
mGP10 = prune_taxa(OTUnames10, mergedGP)
ocean_samples = sample_names(subset(sample_data(GP), SampleType=="Ocean"))
print(ocean_samples)
otu_table(mergedGP)[, hypoxic]
sample_names(mergedGP)
identical(SD, sample_data(mergedGP))
otu.relative <- abundances(ps_water, "compositional")
ps_water_ox <- merge_samples(ps_water_AVG, "oxstate")
SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate")
head(otu_table(ps_water_ox))
ox_otu <- as.data.frame(t(otu_table(ps_water_ox)))
ox_tax <- as.data.frame(tax_table(ps_water_ox))
ox_asv_tab <- merge(ox_otu, ox_tax,by = 0, all = TRUE)
ox_asv_tab <- ox_asv_tab[,-1]
ox_asv_tab <- ox_asv_tab[,c(9,1,2,3,4,5,6,7,8)]
transform_sample_counts(ps_water_ox, function(x) x/sum(x))