# Please note this is all the raw R code pulled from all the `.Rmd` files. # This code is for the 16s rRNA data only since the metagenomic analyses are a mix # of anvio commands and R-code # Use at your own risk as this has not been tested independent of the Rmarkdown # workflows. In other words, the code works in the Rmarkdown format but the # complete pipeline has not been tested using just this code. I used `knitr::purl()` # to pull out the R code from the Rmarkdown file. I did this for you in case you # wanted the code and not hear me drone on about colors or zoomable figures. # Commands that are commented out are things I tried that I could never get to work. # Any line that starts like this: `## ----` is the code chuck name and details. ############################# ## ## Front Page ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = FALSE) library("tm") library("SnowballC") library("wordcloud") library("RColorBrewer") library("wordcloud2") library("webshot") library("htmlwidgets") library(dplyr) library(leaflet) library(htmlwidgets) #### ---- echo=FALSE, warning=FALSE, layout='l-screen'----------------------------- knitr::include_graphics("figures/index/banner.png") #### ---- layout='l-screen-inset shaded', fig.height=5.5--------------------------- #stri_icon <- makeIcon( # iconUrl = "assets/stri_logo.png", # iconWidth = 38, iconHeight = 38 # iconAnchorX = 22, iconAnchorY = 94, #) leaflet() %>% setView(-82.235052, 9.277137, zoom = 12) %>% addTiles() %>% addScaleBar(position = "topright") %>% # addMiniMap(position = "topright", zoomLevelOffset = -5) %>% addCircleMarkers(lng = c(-82.334720, -82.131119), lat = c(9.219640, 9.248433), radius = 22.5, label = c("Cayo Roldan (impacted)", "Coral Caye (control)"), labelOptions = labelOptions(textOnly = TRUE, noHide = TRUE, direction = "bottom", offset = c(6, 12), style = list("font-size" = "14px" , "font-style" = "bold")), color = c("#141414", "#141414"), fillColor = c("#D55E00", "#0072B2"), stroke = TRUE, fillOpacity = 0.9, weight = 10) %>% addCircleMarkers(lng = -82.256463, lat = 9.352402, label = "STRI", labelOptions = labelOptions(textOnly = TRUE, noHide = TRUE, direction = "left", offset = c(-20, 1), style = list("font-size" = "20px" , "font-style" = "bold")), color = "#0072B2", fillColor = "#F0E442", stroke = TRUE, fillOpacity = 0.95, weight = 5, opacity = 0.95) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- filePath <- "files/main.txt" text <- readLines(filePath) docs <- Corpus(VectorSource(text)) inspect(docs) toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x)) docs <- tm_map(docs, toSpace, "/") docs <- tm_map(docs, toSpace, "@") docs <- tm_map(docs, toSpace, "\\|") #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- # Convert the text to lower case docs <- tm_map(docs, content_transformer(tolower)) # Remove numbers docs <- tm_map(docs, removeNumbers) # Remove english common stopwords docs <- tm_map(docs, removeWords, stopwords("english")) # Remove your own stop word # specify your stopwords as a character vector docs <- tm_map(docs, removeWords, c("used", "based", "samples", "using", "sample", "one", "two", "site", "within", "fig")) # Remove punctuations docs <- tm_map(docs, removePunctuation) # Eliminate extra white spaces docs <- tm_map(docs, stripWhitespace) # Text stemming docs <- tm_map(docs, stemDocument) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- dtm <- TermDocumentMatrix(docs) m <- as.matrix(dtm) v <- sort(rowSums(m), decreasing=TRUE) d <- data.frame(word = names(v),freq = v) head(d, 200) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- set.seed(1234) d1 <- d %>% filter(freq > 1) #wordcloud(words = d$word, freq = d$freq, min.freq = 1, scale=c(2,.5), # max.words=200, random.order=FALSE, rot.per=0.35, # colors=brewer.pal(8, "Dark2")) wc <- wordcloud2(data = d1, size = 1.1, color = 'random-dark', fontFamily = "serif", gridSize = 0) saveWidget(wc, "tmp.html", selfcontained = F) webshot("tmp.html", "wordcloud.png", delay = 10, vwidth = 1080, vheight = 480) #### ---- echo=FALSE, warning=FALSE, fig.height=1.5, layout='l-screen'------------- knitr::include_graphics("figures/index/wordcloud.png") ############################# ## ## Site Overview ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) library(dplyr) library(DT) library(ggplot2) library(scales) library(magick) library(gridExtra) library(grid) options(scipen=999) #### ---- color_121, layout="l-body-outset", comment='', include=FALSE------------- # Wong palette wong_pal <- c("#009E73", "#D55E00", "#F0E442", "#CC79A7", "#56B4E9", "#E69F00", "#0072B2", "#7F7F7F", "#000000") palette(wong_pal) gs1 <- lapply(wong_pal, function(ii, wong_pal) grobTree( rectGrob( gp = gpar(fill = ii)), textGrob(ii, gp = gpar(col = "white", fontsize = 10)))) gs1 <- grid.arrange(grobs = gs1, ncol = 3, top = "8-COLOR PALETTE", right = "") # 12 color palette twelve_pal <- c("#323232", "#BF3465", "#50B29E", "#D9D9D9", "#731683", "#1C6CCC", "#21BCFF", "#DFA5E5", "#874310", "#DB6D1B", "#B8CE17", "#F4E345") palette(twelve_pal) gs2 <- lapply(twelve_pal, function(ii, twelve_pal) grobTree( rectGrob( gp = gpar(fill = ii)), textGrob(ii, gp = gpar(col = "white", fontsize = 10)))) gs2 <- grid.arrange(grobs = gs2, ncol = 4, top = "12-COLOR PALETTE", left = "") #### ---- color_8, layout="l-body-outset", comment='', echo=FALSE, fig.height=3---- grid.arrange(gs1, gs2, ncol = 2) ############################# ## ## Field Analyses ## ############################# #### ---- setup, include = FALSE---------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) rm(list = ls()) library(lme4) library(lmerTest) library(car) library(rcompanion) library(MASS) library(emmeans) library(nlme) library(ggplot2) library(plyr) library(RColorBrewer) library(wesanderson) library(reshape2) # restructuring the data library(cowplot) library(scales) library(tidyr) library(dplyr) library(patchwork) #### ---- -------------------------------------------------------------------------- data <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",") attach(data) #### ---- -------------------------------------------------------------------------- plotNormalHistogram(yield, main = "PAM", xlab = "Yield") #### ---- -------------------------------------------------------------------------- lmepam <- lmer(yield ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res1 <- resid(lmepam) #### ---- eval=FALSE, include=FALSE------------------------------------------------ hist(res1, main = "PAM", xlab = "Residuals") plotNormalHistogram(res1) #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res1, main = "PAM", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- pam.shapiro <- shapiro.test(res1) print(pam.shapiro) #### ---- -------------------------------------------------------------------------- qqnorm(res1, ylab = "Sample Quantiles for PAM") qqline(res1, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(yield ~ as.factor(site), data = data) bartlett.test(x = yield, g = site) #### ---- -------------------------------------------------------------------------- pam1 <- lmer(yield ~ site + (1|colony/frag), data = data, REML = FALSE, na.action = na.exclude) ranova(pam1) #### ---- -------------------------------------------------------------------------- pam2 <- lmer(yield ~ site + (1|colony), data = data, REML = FALSE, na.action = na.exclude) ranova(pam2) #### ---- -------------------------------------------------------------------------- mod.AIC <- AIC(pam1, pam2) mod.AIC #### ---- -------------------------------------------------------------------------- finalpam <- lmer(yield ~ site + (1|colony), data = data, REML = TRUE, na.action = na.exclude) #### ---- -------------------------------------------------------------------------- finalrandpam <- ranova(finalpam) pam.table <- anova(finalpam, type = 2) #### ---- -------------------------------------------------------------------------- pam.table #### ---- -------------------------------------------------------------------------- finalrandpam #### ---- -------------------------------------------------------------------------- attach(data) plotNormalHistogram(zoox, main = "Zooxanthellae", xlab = "Counts") #### ---- -------------------------------------------------------------------------- lmezoox <- lmer(zoox ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res2 <- resid(lmezoox) #### ---- include=FALSE, eval = FALSE---------------------------------------------- hist(res2, main = "Zoox", xlab = "Residuals") plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- zoox.shapiro <- shapiro.test(res2) #runs a normality test on residuals print(zoox.shapiro) # null = normally distrubuted (P<0.05 = non-normal #### ---- -------------------------------------------------------------------------- qqnorm(res2, ylab = "Sample Quantiles for Zoox") qqline(res2, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(zoox~ as.factor(site),data = data) # p = 0.0007486 #### ---- -------------------------------------------------------------------------- data$zoox_log = log(zoox) attach(data) colnames(data) #### ---- -------------------------------------------------------------------------- plotNormalHistogram(zoox_log, main = "Zooxanthellae", xlab = "log (counts)") #### ---- -------------------------------------------------------------------------- lmezoox_log <- lmer(zoox_log ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res3 <- resid(lmezoox_log) #### ---- include=FALSE, eval = FALSE---------------------------------------------- hist(res3, main = "zoox log", xlab = "Residuals") plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- zooxlog.shapiro <- shapiro.test(res3) print(zooxlog.shapiro) #### ---- -------------------------------------------------------------------------- qqnorm(res3, ylab = "Sample Quantiles for Zooxlog") qqline(res3, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(zoox_log ~ as.factor(site), data = data) #### ---- -------------------------------------------------------------------------- zoox1 <- lmer(zoox_log ~ site + (1|colony/frag), data = data, REML = FALSE, na.action = na.exclude) #AIC 72.8756 ranova(zoox1) #### ---- -------------------------------------------------------------------------- zoox2 <- lmer(zoox_log ~ site + (1|colony), data = data, REML = FALSE, na.action = na.exclude) ranova(zoox2) mod.AIC <- AIC(zoox1, zoox2) mod.AIC #### ---- -------------------------------------------------------------------------- finalzoox <- lmer(zoox_log ~ site + (1|colony), data = data, REML = TRUE, na.action = na.exclude) #### ---- -------------------------------------------------------------------------- finalrandzoox <- ranova(finalzoox) zoox.table <- anova(finalzoox, type = 2) #ANOVA table zoox.table finalrandzoox #### ---- -------------------------------------------------------------------------- plotNormalHistogram(chla, main = "Chlorophyll a", xlab = "Chlorophyll") #### ---- -------------------------------------------------------------------------- lmechla <- lmer(chla ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res4 <- resid(lmechla) #### ---- include=FALSE, eval=FALSE------------------------------------------------ hist(res4, main = "Chla", xlab = "Residuals") plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- chla.shapiro <- shapiro.test(res4) print(chla.shapiro) #### ---- -------------------------------------------------------------------------- qqnorm(res4, ylab = "Sample Quantiles for Zoox") qqline(res4, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(chla ~ as.factor(site), data = data) # p = 0.01606 #### ---- -------------------------------------------------------------------------- data$chla_log = log(chla) #### ---- -------------------------------------------------------------------------- head(data) attach(data) colnames(data) #### ---- -------------------------------------------------------------------------- plotNormalHistogram(chla_log, main = "Chlorophyll a", xlab = "log (Chlorophyll)") #### ---- -------------------------------------------------------------------------- lmechla_log <- lmer(chla_log ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res7 <- resid(lmechla_log) #### ---- include=FALSE, eval = FALSE---------------------------------------------- hist(res7, main = "chla log", xlab = "Residuals") plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- chlalog.shapiro <- shapiro.test(res7) #runs a normality test on residuals print(chlalog.shapiro) # null = normally distrubuted (P<0.05 = non-normal #### ---- -------------------------------------------------------------------------- qqnorm(res7, ylab = "Sample Quantiles for chlalog") qqline(res7, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(chla_log ~ as.factor(site), data = data) # p = 0.7487 #### ---- -------------------------------------------------------------------------- chla_log1 <- lmer(chla_log ~ site+(1|colony/frag), data = data, REML = FALSE, na.action = na.exclude) #AIC 55.11567 ranova(chla_log1) #### ---- -------------------------------------------------------------------------- chla_log2 <- lmer(chla_log ~ site + (1|colony), data = data, REML = FALSE, na.action = na.exclude) ranova(chla_log2) mod.AIC <- AIC(chla_log1, chla_log2) mod.AIC #### ---- -------------------------------------------------------------------------- finalchla_log <- lmer(chla_log ~ site + (1|colony), data = data, REML = TRUE, na.action = na.exclude) #### ---- -------------------------------------------------------------------------- finalchla_logrand <- ranova(finalchla_log) chla_log.table <- anova(finalchla_log, type = 2) #ANOVA table chla_log.table finalchla_logrand #### ---- -------------------------------------------------------------------------- plotNormalHistogram(chlc, main = "Chlorophyll c", xlab = "Chlorophyll") #### ---- -------------------------------------------------------------------------- lmechlc <- lmer(chlc ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res5 <- resid(lmechlc) #### ---- eval=FALSE, include=FALSE------------------------------------------------ hist(res5,main = "Chlorophyll c", xlab = "Residuals") plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- chlc.shapiro <- shapiro.test(res5) print(chlc.shapiro) #### ---- -------------------------------------------------------------------------- qqnorm(res5, ylab = "Sample Quantiles for Zoox") qqline(res5, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(chlc ~ as.factor(site), data = data) # p = 0.01316 #### ---- -------------------------------------------------------------------------- data$chlc_log <- log(chlc) attach(data) colnames(data) #### ---- -------------------------------------------------------------------------- plotNormalHistogram(data$chlc_log, main = "Chlorophyll c (log)", xlab = "log (Chlorophyll)") #### ---- -------------------------------------------------------------------------- lmechlc_log <- lmer(chlc_log ~ site + (1|colony), REML = FALSE, na.action = na.exclude) res6 <- resid(lmechlc_log) #### ---- include=FALSE, eval=FALSE------------------------------------------------ hist(res6, main = "Chlorophyll c log", xlab = "Residuals") plotNormalHistogram(res6, main = "Chlorophyll c log", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- plotNormalHistogram(res6, main = "Chlorophyll c (log)", xlab = "Residuals") #### ---- -------------------------------------------------------------------------- chlclog.shapiro <- shapiro.test(res6) print(chlclog.shapiro) #### ---- -------------------------------------------------------------------------- qqnorm(res6, ylab = "Sample Quantiles for chlclog") qqline(res6, col = "red") #### ---- -------------------------------------------------------------------------- leveneTest(chlc_log ~ as.factor(site), data = data) # p = 0.884 #### ---- -------------------------------------------------------------------------- chlc_log1 <- lmer(chlc_log ~ site + (1|colony/frag), data = data, REML = FALSE, na.action = na.exclude) #AIC 64.52399 ranova(chlc_log1) #### ---- -------------------------------------------------------------------------- chlc_log2 <- lmer(chlc_log ~ site+(1|colony), data = data, REML = FALSE, na.action = na.exclude) ranova(chlc_log2) mod.AIC <- AIC(chlc_log1, chlc_log2) mod.AIC #### ---- -------------------------------------------------------------------------- finalchlc_log <- lmer(chlc_log ~ site + (1|colony), data = data, REML = TRUE, na.action = na.exclude) #### ---- -------------------------------------------------------------------------- finalchlc_logrand <- ranova(finalchlc_log) chlc_log.table <- anova(finalchlc_log, type = 2) #ANOVA table chlc_log.table finalchlc_logrand #### ---- -------------------------------------------------------------------------- all <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",") attach(all) #### ---- -------------------------------------------------------------------------- means.all <- ddply(all, c("site", "colony"), summarise, N = sum(!is.na(yield)), yield_mean = mean(yield, na.rm = TRUE), zoox_mean = mean(zoox, na.rm = TRUE), chla_mean = mean(chla, na.rm = TRUE), chlc_mean = mean(chlc, na.rm = TRUE), sd_yield = sd(yield), sd_zoox = sd(zoox), sd_chla = sd(chla), sd_chlc = sd(chlc), yield_se = (sd(yield)/sqrt(N)), zoox_se = (sd(zoox)/sqrt(N)), chla_se = (sd(chla)/sqrt(N)), chlc_se = (sd(chlc)/sqrt(N))) write.csv(means.all, file = "tables/field/allcoralmeans.csv", row.names = FALSE) #### ---- -------------------------------------------------------------------------- ggplot(all,aes(x = site, y = yield)) + geom_boxplot() + labs(x = "Site", y='Maximum Quantum Yield') + theme_classic() + theme(text = element_text(size=14)) + theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1), axis.text.y = element_text(colour = "black")) + scale_y_continuous(expand = c(0,0), limits = c(0.4,0.75)) #### ---- -------------------------------------------------------------------------- ggplot(all,aes(x=site, y=yield, color=colony)) + geom_point(size=3) + labs(x = "Site", y = 'Maximum Quantum Yield') + theme_classic() + theme(text = element_text(size=14)) + theme(axis.text.x = element_text(colour="black", angle=0, hjust=1), axis.text.y = element_text(colour="black")) + scale_y_continuous(expand = c(0,0), limits = c(0.4,0.75)) #### ---- -------------------------------------------------------------------------- ggplot(means.all,aes(x = site, y = yield_mean,fill = site)) + geom_boxplot() + labs(x = "Site", y = 'Maximum Quantum Yield') + theme_classic() + scale_fill_manual(values = c("steelblue","darkred")) + theme(text = element_text(size = 14)) + theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1), axis.text.y = element_text(colour = "black")) + scale_y_continuous(expand = c(0,0), limits = c(0.4,0.75)) #### ---- -------------------------------------------------------------------------- jitter <- position_jitter(width = 0.1) yield_plot <- ggplot(means.all, aes(x = site, y = yield_mean)) #### ---- -------------------------------------------------------------------------- yp <- yield_plot+ geom_boxplot(color="black", aes(fill=site), width=0.4) + scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) + geom_point(size=2, position=jitter) + labs(x="Site",y="Maximum Quantum Yield") + theme_classic() + theme(legend.position = c(0.85,0.85), legend.title= element_text(color="transparent"), axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="black"), axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)), panel.border = element_rect(colour = "black", fill=NA, size=.5)) + theme(text = element_text(size=12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) + scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7)) yp #### ---- -------------------------------------------------------------------------- yield_zoox <- ggplot(means.all, aes(x=site, y=zoox_mean)) zp <- yield_zoox+ geom_boxplot(color="black", aes(fill=site), width=0.4) + scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) + geom_point(size=2, position=jitter) + labs(x="Site", y=("Endosymbiont Density (cells cm^-2)")) + theme_classic() + theme(legend.position = "none", # axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="black"), axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)), panel.border = element_rect(colour = "black", fill=NA, size=.5)) + theme(text = element_text(size=12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) + scale_y_continuous(labels = scientific, breaks=seq(0,1.5E6, 4.5E5), expand = c(0,0), limits = c(0,1.5E6)) zp #### ---- -------------------------------------------------------------------------- pigs <- tidyr::gather(means.all, pigment, concentration,chla_mean, chlc_mean) pigments <- dplyr::select(pigs,site,colony,pigment,concentration) #### ---- -------------------------------------------------------------------------- pig_plot <- ggplot(pigments, aes(x=pigment, y=concentration, fill=site)) #### ---- -------------------------------------------------------------------------- pig_plot+ geom_boxplot(color="black") + scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) #### ---- -------------------------------------------------------------------------- jitter2 <- position_jitter(width=0.05) chla_plot <- ggplot(means.all, aes(x=site, y=chla_mean)) #### ---- -------------------------------------------------------------------------- cp <- chla_plot+ geom_boxplot(color="black", aes(fill=site), width=0.4) + scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) + geom_point(size=2, position=jitter) + labs(x="Site", y=("Chl a Concentration (ug cm^-2)")) + theme_classic() + theme(legend.position = "none", axis.text.x = element_text(colour="black"), axis.text.y = element_text(colour="black"), axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)), panel.border = element_rect(colour = "black", fill=NA, size=.5)) + theme(text = element_text(size=12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) + scale_y_continuous(expand = c(0,0), limits = c(0,6)) cp #### ---- echo=FALSE, fig.height=4, layout='l-body-outset', eval=FALSE------------- bar_plots <- yp + zp + cp bar_plots + plot_annotation(tag_levels = 'A', title = 'Heatmaps of taxa distribution', #subtitle = 'Top 30 taxa of non-rarefied data', caption = 'A) DETAILS, B) DETAILS, C) DETAILS' ) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- plot_grid(yp, cp,zp, labels="AUTO", ncol=3, align="v") #### ---- -------------------------------------------------------------------------- ypp <- yield_plot + geom_boxplot(color = "black", aes(fill = site), width = 0.4) + scale_fill_manual(values = c("skyblue3", "coral3"), name = NULL, labels = c("Normoxic", "Hypoxic")) + geom_point(size = 1, position = jitter) + labs(x = "Site", y = ("Maximum Quantum Yield")) + theme_classic() + theme(legend.position = "top", legend.direction = "vertical", # legend.title= element_text(color = "transparent"), axis.text.x = element_text(size = 7, colour="black"), axis.text.y = element_text(size = 8, colour = "black"), axis.title.x = element_blank(), axis.title.y = element_text(size = 9, margin = margin(t = 0, r = 10, b = 0, l = 0)), panel.border = element_rect(colour = "black", fill = NA, size = 0.5)) + theme(text = element_text(size = 12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay", "Roldan" = "Cayo Roldan")) + scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7)) #### ---- -------------------------------------------------------------------------- yzield_zoox <- ggplot(means.all, aes(x = site, y = zoox_mean)) zpp <- yield_zoox+ geom_boxplot(color="black", aes(fill=site), width=0.4) + scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) + geom_point(size=1, position=jitter) + labs(x = "Site", y=("Endosymbiont Density (cells cm^-2)")) + theme_classic() + theme(legend.position = "none", axis.text.x = element_text(size = 7, colour="black"), axis.title.x = element_text(size = 12), axis.text.y = element_text(size = 8, colour="black"), axis.title.y = element_text(size = 9, margin = margin(t=0, r=10,b=0,l=0)), panel.border = element_rect(colour = "black", fill=NA, size=.5)) + theme(text = element_text(size=12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) + scale_y_continuous(labels = scientific, breaks=seq(0,1.5E6, 4.5E5), expand = c(0,0), limits = c(0,1.5E6)) #### ---- -------------------------------------------------------------------------- chla_plot <- ggplot(means.all, aes(x = site, y = chla_mean)) cpp <- chla_plot + geom_boxplot(color="black", aes(fill=site), width=0.4) + scale_fill_manual(values = c("skyblue3","coral3"), name = NULL, labels = c("Normoxic", "Hypoxic")) + geom_point(size=1, position=jitter) + labs(x = "Site", y = ("Chl a Concentration (ug cm^-2)")) + theme_classic() + theme(legend.position = "none", axis.text.x = element_text(size = 7, colour="black"), axis.text.y = element_text(size = 8, colour="black"), axis.title.y = element_text(size = 9, margin = margin(t = 0, r = 10, b = 0, l = 0)), axis.title.x = element_blank(), panel.border = element_rect(colour = "black", fill=NA, size=.5)) + theme(text = element_text(size=12)) + scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) + scale_y_continuous(expand = c(0,0), limits = c(0,6)) #### ---- echo=FALSE, fig.height=5, layout='l-body-outset'------------------------- bar_plots_ft <- ypp + zpp + cpp bar_plots_ft + plot_annotation(tag_levels = 'A', title = 'Coral response variables ', #subtitle = 'Top 30 taxa of non-rarefied data', caption = 'A) Pulse Amplitude Modulated (PAM), B) Zooxanthellae, C) Chlorophyll a (Chla)' ) #### ---- eval=FALSE, echo=FALSE--------------------------------------------------- panel <- plot_grid(ypp, zpp,cpp, labels="AUTO", ncol=1, align="v") panel ########################################################## ## 16s rRNA Workflow ########################################################## ############################# ## ## No 1. DADA2 Workflow ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) set.seed(911) library(dada2); packageVersion("dada2") library(ShortRead); packageVersion("ShortRead") library(ggplot2); packageVersion("ggplot2") library(rmarkdown) library(DT) library(gridExtra) library(grid) knitr::opts_current$get(c( "cache", "cache.path", "cache.rebuild", "dependson", "autodep" )) #### ---- sample_seq_table, echo=FALSE, layout="l-body-outset", eval=FALSE---------- seq_table <- read.table("tables/16s-dada2/sample-seq-info-2019-16s.txt", header = TRUE, sep = "\t") datatable(seq_table, width = "100%", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'lfrtipB', buttons = c('copy', 'csv', 'excel'), pageLength = 8 ) ) cutadapt -g {F-ADAPTER} -G {R-ADAPTER} / -o ${R1}.trimmed.fastq / -p {R2}.trimmed.fastq / ${R1} ${R2} / --discard-untrimmed -e 0.12 #### ---- list_fastq_filesX, eval=FALSE--------------------------------------------- path <- "INPUT_FILES/" head(list.files(path)) #### ---- dummy_1X, echo=FALSE------------------------------------------------------ c("WCC0_R1.trimmed.fastq", "WCC0_R2.trimmed.fastq", "WCC1_R1.trimmed.fastq", "WCC1_R2.trimmed.fastq", "WCC2_R1.trimmed.fastq", "WCC2_R2.trimmed.fastq") #### ---- sort_files, eval=FALSE---------------------------------------------------- fnFs <- sort(list.files(path, pattern = "_R1.trimmed.fastq")) fnRs <- sort(list.files(path, pattern = "_R2.trimmed.fastq")) #### ---- split_nameX, eval=FALSE--------------------------------------------------- sample.names <- sapply(strsplit(fnFs, "_"), `[`, 1) fnFs <-file.path(path, fnFs) fnRs <-file.path(path, fnRs) #### ---- plot_qscoresX, layout="l-body-outset", warning=FALSE, fig.height=3, eval=FALSE---- p1 <- plotQualityProfile(fnFs[1:8], aggregate = TRUE) p2 <- plotQualityProfile(fnRs[1:8], aggregate = TRUE) p3 <- grid.arrange(p1, p2, nrow = 1) ggsave("plot_qscores_compare.png", p3, width = 7, height = 3) #### ---- dummy_2X, echo=FALSE, layout="l-body-outset", warning=FALSE, fig.height=2---- knitr::include_graphics("figures/16s-dada2/plot_qscores_compare.png") #### ---- move_files, eval=FALSE---------------------------------------------------- filt_path <- file.path(path, "filtered") filtFs <- file.path(filt_path, paste0(sample.names, "_F_filt.fastq.gz")) filtRs <- file.path(filt_path, paste0(sample.names, "_R_filt.fastq.gz")) #### ---- filterX, eval=FALSE------------------------------------------------------- out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, truncLen=c(260,150), maxN=0, maxEE=c(3,3), truncQ=2, rm.phix=TRUE, compress=TRUE, multithread = 20) #### ---- filter_table, echo=FALSE, eval=FALSE-------------------------------------- datatable(out, width = "80%", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 8 ) ) #### ---- dummy_4X, echo=FALSE------------------------------------------------------ out_T <- read.table("tables/16s-dada2/out_T.txt", header = TRUE) datatable(out_T, width = "80%", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 8 ) ) #### ---- learn_errors_forwardX, eval=FALSE----------------------------------------- errF <- learnErrors(filtFs, multithread = 20) #### ---- learn_errors_reverseX, eval=FALSE----------------------------------------- errR <- learnErrors(filtRs, multithread = 20) #### ---- plot_errorFX, warning=FALSE, layout="l-body-outset", eval=FALSE----------- plotErrors(errF, nominalQ = TRUE) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- p4 <- plotErrors(errF, nominalQ = TRUE) ggsave("plot_errorF_1.png", p4, width = 7, height = 5) ggsave("plot_errorF_2.png", p4) #### ---- dummy_6X, echo=FALSE, layout="l-body-outset"------------------------------ knitr::include_graphics("figures/16s-dada2/plot_errorF_1.png") #### ---- plot_errorRX, warning=FALSE, layout="l-body-outset", eval=FALSE----------- plotErrors(errR, nominalQ=TRUE) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- p5 <- plotErrors(errR, nominalQ = TRUE) ggsave("plot_errorR_1.png", p5, width = 7, height = 5) ggsave("plot_errorR_2.png", p5) #### ---- dummy_7RX, echo=FALSE, layout="l-body-outset"----------------------------- knitr::include_graphics("figures/16s-dada2/plot_errorR_1.png") #### ---- echo=FALSE--------------------------------------------------------------- TO run and cache the derepFastq you must set `cache.lazy=FALSE` For both code blocks #### ---- dereplicate_reads_F, eval=FALSE------------------------------------------- derepFs <- derepFastq(filtFs) names(derepFs) <- sample.names #### ---- dereplicate_reads_R, eval=FALSE------------------------------------------- derepRs <- derepFastq(filtRs) names(derepRs) <- sample.names #### ---- run_dada2_forwardX1, eval=FALSE------------------------------------------- dadaFs <- dada(derepFs, err = errF, multithread = 20) #### ---- run_dada2_reverseX1, eval=FALSE------------------------------------------- dadaRs <- dada(derepRs, err = errR, multithread = TRUE) #### ---- inspect_f, eval=FALSE----------------------------------------------------- dadaFs[[1]] #### ---- inspect_r, eval=FALSE----------------------------------------------------- dadaRs[[1]] #### ---- merge_paired_reads, eval=FALSE-------------------------------------------- mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs) #### ---- head_file, eval = FALSE, echo = FALSE------------------------------------- head(mergers[[1]]) #### ---- seq_tableX, eval=FALSE---------------------------------------------------- seqtab <- makeSequenceTable(mergers) dim(seqtab) #### ---- echo=FALSE---------------------------------------------------------------- c(8, 4132) #### ---- seq_table2X, eval=FALSE--------------------------------------------------- table(nchar(getSequences(seqtab))) #### ---- trim_lengthX, eval=FALSE-------------------------------------------------- seqtab.2 <- seqtab[,nchar(colnames(seqtab)) %in% seq(360,380)] dim(seqtab.2) #### ---- dummy_14X, echo=FALSE----------------------------------------------------- c(8, 4119) #### ---- trim_length2X, eval=FALSE------------------------------------------------ table(nchar(getSequences(seqtab.2))) #### ---- chimera_on_ind_runs, eval=FALSE------------------------------------------- seqtab.2.nochim <- removeBimeraDenovo(seqtab.2, method="consensus", multithread = 20) dim(seqtab.2.nochim) #### ---- dummy_16, echo=FALSE------------------------------------------------------ c(8, 1424) #### ---- chimera_on_ind_runs2, eval=FALSE------------------------------------------ sum(seqtab.2.nochim)/sum(seqtab.2) #### ---- dummy_17, echo=FALSE------------------------------------------------------ 0.874367 #### ---- build_table_to_track_reads, eval=FALSE------------------------------------ getN <- function(x) sum(getUniques(x)) track <- cbind(out, sapply(dadaFs, getN), sapply(dadaRs, getN), sapply(mergers, getN), rowSums(seqtab.2.nochim)) colnames(track) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nonchim") rownames(track) <- sample.names #### ---- dummy_44X, echo=FALSE, layout="l-body-outset"----------------------------- sum_run <- read.table("tables/16s-dada2/read_changes.txt", header = TRUE) datatable(sum_run, width = "80%", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 8 ) ) #### ---- track_changes, eval=FALSE, echo=FALSE------------------------------------- write.table(track, "tables/16s-dada2/read_changes.txt", sep = "\t", quote = FALSE, col.names=NA) #### ---- saveRDS, eval=FALSE------------------------------------------------------- saveRDS(seqtab.2.nochim, "seqtab.2.nochim.rds") #### ---- read_combo, eval=FALSE---------------------------------------------------- remove(list = ls()) seqtab <- readRDS("seqtab.2.nochim.rds") #### ---- assign_tax_silva, eval = FALSE-------------------------------------------- tax_silva <- assignTaxonomy( seqtab, "../taxonomy_databases/silva_nr_v138_train_set.fa.gz", multithread = TRUE) #### ---- save_image, eval=FALSE---------------------------------------------------- save.image("rdata/16s-dada2/water_pipeline.rdata") ############################# ## ## No 2. Data Preparation ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) set.seed(0199) library(phyloseq); packageVersion("phyloseq") library(DT) library(ggplot2) library(Biostrings); packageVersion("Biostrings") library(dplyr) library(microbiome) library(tidyverse) options(scipen=999) knitr::opts_current$get(c( "cache", "cache.path", "cache.rebuild", "dependson", "autodep" )) #### ---- echo=FALSE--------------------------------------------------------------- ### NOTE: this should only be run AFTER the workflow is finished. ### This image is created at the END of this workflow and now read back in. ### Basically, this locks everything in place so nothing changes. Only code ### chunkc to display figures and tables are evaluated. Everything else is ### turned off. To actually run this you need to set this chunk to ### eval=FALSE and the rest to eval=TRUE load("rdata/16s-data-prep/16s-data_prep.rdata") #### ---- sample_table1, echo=FALSE, eval=FALSE------------------------------------- joined_tab <- read.table( "tables/16s-data-prep/read_changes.txt", header = TRUE, sep = "\t") joined_tab_percent <- joined_tab percent_change <- 1-(joined_tab$nonchim/joined_tab$input) %>% round(digits = 3) joined_tab_percent$Change <- as.numeric(sprintf("%.3f", percent_change)) colnames(joined_tab_percent) <- c("Sample
ID", "Type", "Site", "input", "filter", "denoiseF", "denoiseR", "merged", "nochim", "Change") #### ---- samp-summary1, echo=FALSE, layout="l-page"-------------------------------- NOTE. For some reason this datatable was being given the same elementId as the first table. So I had to add a 20 character elementId https://www.random.org/strings/ datatable(joined_tab_percent, width = "100%", escape = FALSE, rownames = FALSE, filter = 'top', caption = htmltools::tags$caption( style = 'caption-side: bottom; text-align: left;', 'Table: ', htmltools::em('Tracking read changes through DADA2 workflow. Use the buttons to navigate through the table or download a copy. Table may scroll right. ')), elementId = "vwltiu9qj77sfy3a07we", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 8 ) ) %>% DT::formatStyle(columns = colnames(joined_tab_percent), fontSize = '80%') cutoff <- 2000 remove_sam <- joined_tab[joined_tab$nonchim <= cutoff, ] #### ---- deliniate_sample_types, eval=FALSE---------------------------------------- load("rdata/16s-dada2/water_pipeline.rdata") samples.out <- rownames(seqtab) subject <- sapply(strsplit(samples.out, "[[:digit:]]"), `[`, 1) # this splits the string at first instance of a digit sample_name <- substr(samples.out, 1, 999) # use the whole string for individuals type <- substr(samples.out, 0, 1) # use the first two letters for sample typle site <- substr(samples.out, 2, 3) # use the next three letters for site num_samp <- length(unique(sample_name)) num_type <- length(unique(type)) num_sites <- length(unique(site)) #### ---- define_variables---------------------------------------------------------- #define a sample data frame samdf <- data.frame(SamName = sample_name, TYPE = type, SITE = site) rownames(samdf) <- samples.out #### ---- create_ps_object, eval=FALSE---------------------------------------------- # this create the phyloseq object ps <- phyloseq(otu_table(seqtab, taxa_are_rows = FALSE), sample_data(samdf), tax_table(tax_silva)) tax_table(ps) <- cbind(tax_table(ps), rownames(tax_table(ps))) # adding unique ASV names taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps))) tax_table(ps) <- cbind(tax_table(ps), rownames(tax_table(ps))) head(taxa_names(ps)) #### ---- echo=FALSE--------------------------------------------------------------- head(taxa_names(ps)) #### ---- calculate_stats_ps, echo=FALSE, eval=FALSE-------------------------------- min_read_ps <- min(readcount(ps)) max_read_ps <- max(readcount(ps)) total_reads_ps <- sum(readcount(ps)) mean_reads_ps <- round(mean(readcount(ps)), digits = 0) median_reads_ps <- median(readcount(ps)) total_asvs_ps <- ntaxa(ps) singleton_ps <- tryCatch(ntaxa(rare(ps, detection = 1, prevalence = 0)), error=function(err) NA) singleton_ps_perc <- tryCatch(round((100*(ntaxa(rare(ps, detection = 1, prevalence = 0)) / ntaxa(ps))), digits = 3), error=function(err) NA) sparsity_ps <- round(length(which(abundances(ps) == 0))/length(abundances(ps)), digits = 3) #### ---- add_ASV_coulmn, eval=FALSE------------------------------------------------ colnames(tax_table(ps)) <- c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "ASV_SEQ", "ASV_ID") ps #### ---- echo=FALSE--------------------------------------------------------------- ps #### ---- export_seq_tax_tables, eval=FALSE----------------------------------------- write.table(tax_table(ps), "tables/16s-data-prep/full_tax_table.txt", sep="\t", quote = FALSE, col.names=NA) write.table(t(otu_table(ps)), "tables/16s-data-prep/full_seq_table.txt", sep="\t", quote = FALSE, col.names=NA) #### ---- export_fasta_full, eval=FALSE--------------------------------------------- # Create fasta file from tax_table table2format <- tax_table(ps) #retain only the column with the sequences table2format_trim <- table2format[, 7] table2format_trim_df <- data.frame(row.names(table2format_trim), table2format_trim) colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ") #format fasta table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID) write.table(table2format_trim_df, "tables/16s-data-prep/full_asv.fasta", sep = "\r", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") #### ---- remove_specific_taxa, eval=FALSE------------------------------------------ # generate a file with mitochondria ASVs MT1 <- subset_taxa(ps, Family == "Mitochondria") MT1 <- as(tax_table(MT1), "matrix") MT1 <- MT1[, 8] MT1df <- as.factor(MT1) goodTaxa <- setdiff(taxa_names(ps), MT1df) ps_no_mito <- prune_taxa(goodTaxa, ps) ps_no_mito #### ---- echo=FALSE--------------------------------------------------------------- ps_no_mito #### ---- remove_specific_taxa2, eval=FALSE----------------------------------------- # generate a file with mitochondria ASVs CH1 <- subset_taxa(ps_no_mito, Order == "Chloroplast") CH1 <- as(tax_table(CH1), "matrix") CH1 <- CH1[, 8] CH1df <- as.factor(CH1) goodTaxa <- setdiff(taxa_names(ps_no_mito), CH1df) ps_no_chloro <- prune_taxa(goodTaxa, ps_no_mito) ps_no_chloro #### ---- echo=FALSE--------------------------------------------------------------- ps_no_chloro #### ---- remove_specific_taxa3, eval=FALSE, eval=FALSE----------------------------- # generate a file with mitochondria ASVs EU1 <- subset_taxa(ps_no_chloro, Kingdom == "Eukaryota") EU1 <- as(tax_table(EU1), "matrix") EU1 <- EU1[, 8] EU1df <- as.factor(EU1) goodTaxa <- setdiff(taxa_names(ps_no_chloro), EU1df) ps_no_euk <- prune_taxa(goodTaxa, ps_no_chloro) ps_no_euk #### ---- remove_kingdom_na, eval=FALSE--------------------------------------------- ps_filt <- subset_taxa(ps_no_chloro, !is.na(Kingdom)) ps_filt #### ---- echo=FALSE--------------------------------------------------------------- ps_filt #### ---- save_trim_table, eval=FALSE----------------------------------------------- write.table(tax_table(ps_filt), "tables/16s-data-prep/trim_tax_table.txt", sep="\t", quote = FALSE, col.names=NA) write.table(t(otu_table(ps_filt)), "tables/16s-data-prep/trim_seq_table.txt", sep="\t", quote = FALSE, col.names=NA) write.table(sample_data(ps_filt), "tables/16s-data-prep/sample_table.txt", sep="\t", quote = FALSE, col.names=NA) #### ---- export_fasta_water, eval=FALSE-------------------------------------------- # Create fasta file from tax_table table2format <- tax_table(ps_filt) #retain only the column with the sequences table2format_trim <- table2format[, 7] table2format_trim_df <- data.frame(row.names(table2format_trim), table2format_trim) colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ") #format fasta table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID) write.table(table2format_trim_df, "tables/16s-data-prep/trim_asv.fasta", sep = "\r", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") #### ---- select_water_samples, echo=FALSE, eval=FALSE------------------------------ ############################################### ## This is legacy code for making a ps object## ## for a subset of samples ## ############################################### ## Water Samples Phyloseq Object #The next thing to do is select only water samples and make a separate phyloseq object---since our paper is about the water samples. To do this we must select the samples we *wish to keep*. If you want to change the group of samples, modify the script accordingly. ps_water <- prune_samples( c("WCC0", "WCC1", "WCC2", "WCC3", "WCR0", "WCR1", "WCR2", "WCR3"), ps_filt) ps_water #Anytime we remove samples we probably lose some ASVs. So we need to get rid of any ASVs that have a total of **0 reads**. ps_water <- prune_taxa(taxa_sums(ps_water) > 0, ps_water) ps_water #So for water samples only there are `r ntaxa(ps_water)` ASVs and `r nsamples(ps_water)` samples. Now, save the various phyloseq data from the water object. #### ---- save_data_prep_image, echo=TRUE, eval=FALSE------------------------------- ps_water <- ps_filt save.image("rdata/16s-data-prep/16s-data_prep.rdata") saveRDS(otu_table(ps_water), "rdata/16s-data-prep/ps_water-seqtab.rds") saveRDS(sample_data(ps_water), "rdata/16s-data-prep/ps_water-sample.rds") saveRDS(tax_table(ps_water), "rdata/16s-data-prep/ps_water-taxtab.rds") ############################# ## ## No 3. Diversity ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) set.seed(0199) library(phyloseq); packageVersion("phyloseq") library(DT) library(ggplot2) library(Biostrings); packageVersion("Biostrings") library(dplyr) library(microbiome) library(tidyverse) library(data.table) library(plyr) library(stringr) require(gdata) library(labdsv) library(reshape) options(scipen=999) knitr::opts_current$get(c( "cache", "cache.path", "cache.rebuild", "dependson", "autodep" )) #### ---- load_water_2, echo=FALSE-------------------------------------------------- ### NOTE: this should only be run AFTER the workflow is finished. ### This image is created at the END of this workflow and now read back in. ### Basically, this locks everything in place so nothing changes. Only code ### chunk to display figures and tables are evaluated. Everything else is ### turned off. To actually run this you need to set this chunk to ### eval=FALSE and the rest to eval=TRUE load("rdata/16s-water/16s-water.rdata") #### ---- load_water_1, eval=FALSE-------------------------------------------------- remove(list = ls()) sample_d <- readRDS("rdata/16s-data-prep/ps_water-sample.rds") seqtab <- readRDS("rdata/16s-data-prep/ps_water-seqtab.rds") taxtab <- readRDS("rdata/16s-data-prep/ps_water-taxtab.rds") ps_water <- merge_phyloseq(sample_d, seqtab, taxtab) ps_water_o <- ps_water ps_water #### ---- echo=FALSE--------------------------------------------------------------- ps_water #### ---- save_water_table, eval=FALSE---------------------------------------------- write.table(tax_table(ps_water), "tables/16s-water/water_tax_table.txt", sep="\t", quote = FALSE, col.names=NA) write.table(t(otu_table(ps_water)), "tables/16s-water/water_seq_table.txt", sep="\t", quote = FALSE, col.names=NA) #### ---- export_fasta_water, eval=FALSE-------------------------------------------- # Create fasta file from tax_table table2format <- tax_table(ps_water) #retain only the column with the sequences table2format_trim <- table2format[, 7] table2format_trim_df <- data.frame(row.names(table2format_trim), table2format_trim) colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ") #format fasta table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID) write.table(table2format_trim_df, "tables/16s-water/water_asv.fasta", sep = "\r", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") #### ---- add_var_cats, eval=FALSE-------------------------------------------------- sample_data(ps_water)$oxstate <- c("normoxic", "normoxic", "normoxic", "normoxic", "normoxic", "hypoxic", "hypoxic", "hypoxic") sample_data(ps_water)$period <- c("cc_after", "cc_during", "cc_during", "cc_during", "cr_after", "cr_during", "cr_during", "cr_during") #### ---- calculate_stats_ps_water, echo=FALSE, eval=FALSE-------------------------- ### Reads min_read_ps_water <- min(readcount(ps_water)) max_read_ps_water <- max(readcount(ps_water)) total_reads_ps_water <- sum(readcount(ps_water)) mean_reads_ps_water <- round(mean(readcount(ps_water)), digits = 0) median_reads_ps_water <- median(readcount(ps_water)) ### ASVs total_asvs_ps_water <- ntaxa(ps_water) min_asvs_ps_water <- min(estimate_richness( ps_water, measures = "Observed")) max_asvs_ps_water <- max(estimate_richness( ps_water, measures = "Observed")) mean_asvs_ps_water <- formatC(mean( estimate_richness(ps_water, measures = "Observed")$Observed), digits = 0, format = 'f') median_asvs_ps_water <- median( estimate_richness(ps_water, measures = "Observed")$Observed) singleton_ps_water <- tryCatch(ntaxa(rare(ps_water, detection = 1, prevalence = 0)), error=function(err) NA) singleton_ps_water_perc <- tryCatch(round((100*(ntaxa(rare(ps_water, detection = 1, prevalence = 0)) / ntaxa(ps_water))), digits = 3), error=function(err) NA) sparsity_ps_water <- round(length(which(abundances(ps_water) == 0))/length(abundances(ps_water)), digits = 3) #### ---- sample_summary_table, eval=FALSE------------------------------------------ total_reads <- sample_sums(ps_water) total_reads <- as.data.frame(total_reads, make.names = TRUE) total_reads <- total_reads %>% rownames_to_column("Sample_ID") total_asvs <- estimate_richness(ps_water, measures = c( "Observed", "Shannon", "InvSimpson")) total_asvs <- total_asvs %>% rownames_to_column("Sample_ID") total_asvs$Sample_ID <- gsub('\\.', '-', total_asvs$Sample_ID) #### ---- sample_summary_table2, eval=FALSE----------------------------------------- sam_details <- meta(sample_data(ps_water)) rownames(sam_details) <- NULL colnames(sam_details) <- c("Sample_ID", "Type", "Site", "Oxstate", "Period") merge_tab <- merge(sam_details, total_reads, by = "Sample_ID") merge_tab2 <- merge(merge_tab, total_asvs, by = "Sample_ID") colnames(merge_tab2) <- c("Sample
ID", "Type", "Site", "Oxstate", "Period", "total
reads", "total
ASVs", "Shannon", "InvSimpson") #### ---- sample_summary_table_display, layout="l-body-outset", echo=FALSE---------- datatable(merge_tab2, width = "100%", escape = FALSE, rownames = FALSE, caption = htmltools::tags$caption( style = 'caption-side: bottom; text-align: left;', 'Table: ', htmltools::em('Total reads & ASVs by sample.')), elementId = "mex738zeiysgiy3fysb8", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 8, lengthMenu = c(5, 10) ) ) %>% formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>% formatStyle(columns = colnames(merge_tab2), fontSize = '80%') #### ---- tax_clean, echo=FALSE, eval=FALSE----------------------------------------- tax.clean <- data.frame(tax_table(ps_water)) for (i in 1:6){ tax.clean[,i] <- as.character(tax.clean[,i])} tax.clean[is.na(tax.clean)] <- "" for (i in 1:nrow(tax.clean)){ if (tax.clean[i,2] == ""){ kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "") tax.clean[i, 2:6] <- kingdom } else if (tax.clean[i,3] == ""){ phylum <- paste("Phylum_", tax.clean[i,2], sep = "") tax.clean[i, 3:6] <- phylum } else if (tax.clean[i,4] == ""){ class <- paste("Class_", tax.clean[i,3], sep = "") tax.clean[i, 4:6] <- class } else if (tax.clean[i,5] == ""){ order <- paste("Order_", tax.clean[i,4], sep = "") tax.clean[i, 5:6] <- order } else if (tax.clean[i,6] == ""){ tax.clean$Genus[i] <- paste("Family",tax.clean$Family[i], sep = "_") } } tax_table(ps_water) <- as.matrix(tax.clean) rank_names(ps_water) rm(class, order, phylum, kingdom) #### ---- echo=FALSE--------------------------------------------------------------- rank_names(ps_water) #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- write.table(tax_table(ps_water), "tables/16s-water/water_tax_table-no-na-names.txt", sep="\t", quote = FALSE, col.names=NA) write.table(t(otu_table(ps_water)), "tables/16s-water/water_seq_table-no-na-names.txt", sep="\t", quote = FALSE, col.names=NA) #### ---- assign_rank, eval=FALSE--------------------------------------------------- TRANK <- "Class" #### ---- diversity_table_w1, eval=FALSE-------------------------------------------- tax_asv <- table(tax_table(ps_water)[, TRANK], exclude = NULL, dnn = "Taxa") tax_asv <- as.data.frame(tax_asv, make.names = TRUE, stringsAsFactors=FALSE) tax_reads <- factor(tax_table(ps_water)[, TRANK]) tax_reads <- apply(otu_table(ps_water), MARGIN = 1, function(x) { tapply(x, INDEX = tax_reads, FUN = sum, na.rm = TRUE, simplify = TRUE) }) tax_reads <- as.data.frame(tax_reads, make.names = TRUE) tax_reads <- cbind(tax_reads, reads = rowSums(tax_reads)) tax_reads <- tax_reads[9] tax_reads <- setDT(tax_reads, keep.rownames = TRUE)[] #### ---- diversity_table_w2, eval=FALSE-------------------------------------------- taxa_read_asv_tab <- merge(tax_reads, tax_asv, by.x = "rn", by.y = "Taxa") top_reads <- top_n(taxa_read_asv_tab, n = 8, wt = reads) top_asvs <- top_n(taxa_read_asv_tab, n = 8, wt = Freq) names(taxa_read_asv_tab) <- c("Taxa", "total reads", "total ASVs") #### ---- diversity_table_w3, echo=FALSE-------------------------------------------- datatable( taxa_read_asv_tab, rownames = FALSE, width = "100%", colnames = c("Taxa", "total reads", "total ASVs"), caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Total reads & ASVs by taxa" ), elementId = "9sgooxcmkzw565dtk7or", extensions = "Buttons", options = list( columnDefs = list(list(className = "dt-left", targets = 0)), dom = "Blfrtip", buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE, pageLength = 5, scroller=TRUE, lengthMenu = c(5, 10, 25, 60))) top_reads2 <- tibble::column_to_rownames(top_reads, var = "rn") top_reads2 <- top_reads2[with(top_reads2, order(-reads)),] top_asvs2 <- tibble::column_to_rownames(top_asvs, var = "rn") top_asvs2 <- top_asvs2[with(top_asvs2, order(-Freq)),] #### ---- assign_rank2, eval=FALSE-------------------------------------------------- TRANK <- "Class" #### ---- calc_avg2, warning=FALSE, eval=FALSE-------------------------------------- ps_water_AVG <- transform_sample_counts(ps_water, function(x) x/sum(x)) ps_water_ox <- merge_samples(ps_water, "oxstate") SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate") #### ---- calc_avg3, warning=FALSE, eval=FALSE-------------------------------------- mdata_phy_w <- tax_glom(ps_water_ox, taxrank = TRANK, NArm = FALSE) mdata_phyrel_w <- transform_sample_counts(mdata_phy_w, function(x) x/sum(x)) meltd_w <- psmelt(mdata_phyrel_w) meltd_w[[TRANK]] <- as.character(meltd_w[[TRANK]]) #### ---- calc_avg4, warning=FALSE, eval=FALSE-------------------------------------- means_w <- ddply(meltd_w, ~get(TRANK), function(x) c(mean = mean(x$Abundance))) colnames(means_w) <- c(TRANK, "mean") means_w$mean <- round(means_w$mean, digits = 5) taxa_means_w <- means_w[order(-means_w$mean), ] # this orders in decending fashion taxa_means_w <- format(taxa_means_w, scientific = FALSE) # ditch the sci notation #### ---- calc_avg5, warning=FALSE, echo=FALSE-------------------------------------- datatable( taxa_means_w, rownames = FALSE, width = "100%", colnames = c(TRANK, "mean"), caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Relative abundance by taxonomic rank. "), elementId = "t9xg4oo3fbjqjqu1ogjm", extensions = "Buttons", options = list(columnDefs = list(list( className = "dt-left", targets = "_all")), dom = "Blfrtip", buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE, pageLength = 10, scroller=TRUE, lengthMenu = c(10, 25, 60))) #### ---- calc_avg6, warning=FALSE, eval=FALSE-------------------------------------- TAXAN <- 8 top_perc_w <- top_n(taxa_means_w, n = TAXAN, wt = mean) top_perc_w$mean <- round(as.numeric(top_perc_w$mean), digits = 5) min_top_perc_w <- round(as.numeric(min(top_perc_w$mean)), digits = 5) top_perc_w_list <- top_perc_w[,1] #### ---- make_other_n_v_h, eval=FALSE---------------------------------------------- Other_w <- means_w[means_w$mean < min_top_perc_w, ][[TRANK]] meltd_w[meltd_w[[TRANK]] %in% Other_w, ][[TRANK]] <- "Other" samp_names_w <- aggregate(meltd_w$Abundance, by = list(meltd_w$Sample), FUN = sum)[, 1] .e_w <- environment() meltd_w[, TRANK] <- factor(meltd_w[, TRANK], sort(unique(meltd_w[, TRANK]))) meltd_w1 <- meltd_w[order(meltd_w[, TRANK]), ] target <- c("Cyanobacteriia", "Alphaproteobacteria", "Bacteroidia", "Gammaproteobacteria", "Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)", "Acidimicrobiia", "Campylobacteria", "Other") meltd_w1[[TRANK]] <- reorder.factor(meltd_w1[[TRANK]], new.order = target) #### ---- make_fig_n_v_h, echo=FALSE, eval=FALSE------------------------------------ family_pal <- c("#009E73", #1 "#56B4E9", #2 "#CC79A7", #3 "#0072B2", #4 "#D55E00", #5 "#B6DBFF", #6 "#E69F00", #7 "#F0E442", #8 "#323232", #9 "#7F7F7F") fig_w <- ggplot(meltd_w1, aes_string(x = "Sample", y = "Abundance", fill = TRANK, show.legend = FALSE), ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label") fig_w <- fig_w + geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = 0.95) + coord_flip() + theme(aspect.ratio = 1/4) + theme(legend.position = "bottom") fig_w <- fig_w + scale_fill_manual(values = family_pal) fig_w <- fig_w + labs(x = "Environment", y = "Relative abundance (% total reads)") fig_w <- fig_w + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) + theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) + theme(legend.key = element_rect(colour = "black")) fig_w <- fig_w + guides( fill = guide_legend(override.aes = list(colour = NULL), reverse = FALSE, title.position = "top", nrow = 3)) fig_w <- fig_w + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1)) fig_w #ggsave("oxstate_by_taxa.png") #invisible(dev.off()) #### ---- echo=FALSE, warning=FALSE, fig.height=2---------------------------------- knitr::include_graphics("figures/16s-water/bar-norm-v-hypox.png") #### ---- hyp_v_norm_agg, eval=FALSE------------------------------------------------ hyp_v_norm <- aggregate(meltd_w1$Abundance, by = list(Rank = meltd_w1[[TRANK]], Sample = meltd_w1$Sample), FUN = sum) hyp_v_norm$x <- round(hyp_v_norm$x, digits = 4) hyp_v_norm <- spread(hyp_v_norm, Sample, x) #### ---- sample_summary_table_display2, echo=FALSE--------------------------------- datatable(hyp_v_norm, width = "100%", escape = FALSE, rownames = FALSE, caption = htmltools::tags$caption( style = 'caption-side: bottom; text-align: left;', 'Table: ', htmltools::em('Relative abundance (reads) of normoxic vs hypoxic samples by dominant taxa.')), elementId = "wewuidn73q2ceico4vyw", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 9 ) ) #%>% # formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>% # formatStyle(columns = colnames(merge_tab2), fontSize = '80%') #### ---- assign_rankB, echo=FALSE, eval=FALSE-------------------------------------- TRANK <- "Class" #### ---- calc_avg2B, warning=FALSE, echo=FALSE, eval=FALSE------------------------- ps_water_tm <- merge_samples(ps_water, "period") SD_BAR_tm <- merge_samples(sample_data(ps_water_AVG), "period") #### ---- calc_avg3B, warning=FALSE, echo=FALSE, eval=FALSE------------------------- mdata_phy_tm <- tax_glom(ps_water_tm, taxrank = TRANK, NArm = FALSE) mdata_phyrel_tm <- transform_sample_counts(mdata_phy_tm, function(x) x/sum(x)) meltd_tm <- psmelt(mdata_phyrel_tm) meltd_tm[[TRANK]] <- as.character(meltd_tm[[TRANK]]) #### ---- calc_avg4B, warning=FALSE, echo=FALSE, eval=FALSE------------------------- means_tm <- ddply(meltd_tm, ~get(TRANK), function(x) c(mean = mean(x$Abundance))) colnames(means_tm) <- c(TRANK, "mean") means_tm$mean <- round(means_tm$mean, digits = 5) taxa_means_tm <- means_tm[order(-means_tm$mean), ] # this orders in decending fashion taxa_means_tm <- format(taxa_means_tm, scientific = FALSE) # ditch the sci notation #### ---- calc_avg5B, warning=FALSE, echo=FALSE------------------------------------- datatable( taxa_means_tm, rownames = FALSE, width = "100%", colnames = c(TRANK, "mean"), caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Relative abundance by taxonomic rank. "), elementId = "zswbg7x7f8eqj526xuox", extensions = "Buttons", options = list(columnDefs = list(list( className = "dt-left", targets = "_all")), dom = "Blfrtip", buttons = c("csv", "copy"), scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE, pageLength = 9, scroller=TRUE, lengthMenu = c(9, 25, 60))) #### ---- calc_avg6B, warning=FALSE, echo=FALSE, eval=FALSE------------------------- TAXAN <- 8 top_perc_tm <- top_n(taxa_means_tm, n = TAXAN, wt = mean) top_perc_tm$mean <- round(as.numeric(top_perc_tm$mean), digits = 5) min_top_perc_tm <- round(as.numeric(min(top_perc_tm$mean)), digits = 5) top_perc_tm_list <- top_perc_tm[,1] #### ---- make_other_d_v_a, echo=FALSE, eval=FALSE---------------------------------- Other_tm <- means_tm[means_tm$mean < min_top_perc_tm, ][[TRANK]] meltd_tm[meltd_tm[[TRANK]] %in% Other_tm, ][[TRANK]] <- "Other" samp_names_tm <- aggregate(meltd_tm$Abundance, by = list(meltd_tm$Sample), FUN = sum)[, 1] .e_tm <- environment() meltd_tm[, TRANK] <- factor(meltd_tm[, TRANK], sort(unique(meltd_tm[, TRANK]))) meltd_tm1 <- meltd_tm[order(meltd_tm[, TRANK]), ] target <- c("Cyanobacteriia", "Alphaproteobacteria", "Bacteroidia", "Gammaproteobacteria", "Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)", "Acidimicrobiia", "Campylobacteria", "Other") meltd_tm1[[TRANK]] <- reorder.factor(meltd_tm1[[TRANK]], new.order=target) #### ---- make_fig_d_v_a, echo=FALSE, layout="l-body-outset", eval=FALSE------------ family_pal <- c("#009E73", #1 "#56B4E9", #2 "#CC79A7", #3 "#0072B2", #4 "#D55E00", #5 "#B6DBFF", #6 "#E69F00", #7 "#F0E442", #8 "#323232", #9 "#7F7F7F") fig_tm <- ggplot(meltd_tm1, aes_string(x = "Sample", y = "Abundance", fill = TRANK, show.legend = FALSE), ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label") fig_tm <- fig_tm + geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = 0.95) + coord_flip() + theme(aspect.ratio = 1/4) + theme(legend.position = "bottom") fig_tm <- fig_tm + scale_fill_manual(values = family_pal) fig_tm <- fig_tm + labs(x = "Environment", y = "Relative abundance (% total reads)", title = "Abundance of taxa before & after event") fig_tm <- fig_tm + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) + theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) + theme(legend.key = element_rect(colour = "black")) fig_tm <- fig_tm + guides( fill = guide_legend(override.aes = list(colour = NULL), reverse = FALSE, title.position = "top", nrow = 3)) fig_tm <- fig_tm + theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1)) fig_tm #ggsave("period_by_taxa.png") #invisible(dev.off()) #### ---- echo=FALSE, warning=FALSE, fig.height=2, layout="l-body-outset"---------- knitr::include_graphics("figures/16s-water/bar-before-v-after.png") #### ---- before_v_after1, echo=FALSE, eval=FALSE----------------------------------- before_v_after <- aggregate(meltd_tm1$Abundance, by = list(Rank = meltd_tm1[[TRANK]], Sample = meltd_tm1$Sample), FUN = sum) before_v_after$x <- round(before_v_after$x, digits = 4) before_v_after <- spread(before_v_after, Sample, x) #### ---- before_v_after2, echo=FALSE----------------------------------------------- datatable(before_v_after, width = "100%", escape = FALSE, rownames = FALSE, colnames = c("Crawl Caye (after)", "Crawl Caye (during)", "Cayo Roldan (after)", "Cayo Roldan (during)"), caption = htmltools::tags$caption( style = 'caption-side: bottom; text-align: left;', 'Table: ', htmltools::em('Relative abundance (reads) of normoxic vs hypoxic samples by dominant taxa.')), elementId = "kqtr8k9xeqyk0sb1s4h4", extensions = 'Buttons', options = list( scrollX = TRUE, dom = 'Brti', buttons = c('copy', 'csv', 'excel'), pageLength = 9 ) ) #%>% # formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>% # formatStyle(columns = colnames(merge_tab2), fontSize = '80%') #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- saveRDS(ps_water, "rdata/16s-water/ps_water.rds") saveRDS(ps_water_o, "rdata/16s-water/ps_water_o.rds") saveRDS(ps_water_ox, "rdata/16s-water/ps_water_ox.rds") save.image("rdata/16s-water/16s-water.rdata") ############################# ## ## No 4. Differentially Abundant ASVs ## ############################# #### ---- setup, include=FALSE------------------------------------------------------ knitr::opts_chunk$set(echo = TRUE) set.seed(0199) library(phyloseq); packageVersion("phyloseq") library(DT) library(ggplot2) library(Biostrings); packageVersion("Biostrings") library(dplyr) library(microbiome) library(tidyverse) library(data.table) library(plyr) library(stringr) require(gdata) library(labdsv) library(reshape) library(metacoder) library(naniar) options(scipen=999) knitr::opts_current$get(c( "cache", "cache.path", "cache.rebuild", "dependson", "autodep" )) #### ---- echo=FALSE--------------------------------------------------------------- ### NOTE: this should only be run AFTER the workflow is finished. ### This image is created at the END of this workflow and now read back in. ### Basically, this locks everything in place so nothing changes. Only code ### chunkc to display figures and tables are evaluated. Everything else is ### turned off. To actually run this you need to set this chunk to ### eval=FALSE and the rest to eval=TRUE load("rdata/16s-da-asvs/da_asvs_pipeline.rdata") #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- remove(list = ls()) ps_water <- readRDS("rdata/16s-water/ps_water.rds") ps_water_o <- readRDS("rdata/16s-water/ps_water_o.rds") ps_water_ox <- readRDS("rdata/16s-water/ps_water_ox.rds") load("rdata/16s-water/16s-water.rdata") #### ---- idval_analysis, results='hide', eval=FALSE-------------------------------- set.seed(119) #rm(class, order, phylum, kingdom) #ls(all.names = TRUE) water_seq_table <- data.frame(otu_table(ps_water)) # Delete columns when sum == 0 Should not be any water_seq_table <- water_seq_table[, which(colSums(water_seq_table) != 0)] water_seq_table <- cbind(oxstate = c("normoxic","normoxic", "normoxic","normoxic", "normoxic","hypoxic", "hypoxic","hypoxic"), water_seq_table) iva <- indval(water_seq_table[,-1], water_seq_table[,1]) gr <- iva$maxcls[iva$pval <= 0.05] iv <- iva$indcls[iva$pval <= 0.05] pv <- iva$pval[iva$pval <= 0.05] fr <- apply(water_seq_table[,-1] > 0, 2, sum)[iva$pval <= 0.05] indvalsummary <- data.frame(group = gr, indval = iv, pvalue = pv, freq = fr) indvalsummary <- indvalsummary[order(indvalsummary$group, -indvalsummary$indval),] water_tax_table <- data.frame(tax_table(ps_water)) indvalsummary_tax <- merge(indvalsummary, water_tax_table, by = "row.names", all = TRUE) indvalsummary_tax <- indvalsummary_tax[!(is.na(indvalsummary_tax$group)),] lapply(indvalsummary_tax, class) class(indvalsummary_tax$group) = "character" indvalsummary_tax$group <- ifelse(indvalsummary_tax$group == "1", as.character("hypoxic"), as.character("normoxic")) #### ---- idval_tables, echo=FALSE, eval=FALSE-------------------------------------- water_seq_table_t <- water_seq_table water_seq_table_t[,1] <- NULL water_seq_table_t <- as.data.frame(t(water_seq_table_t)) water_seq_table_t$reads <- rowSums(water_seq_table_t) water_seq_table_t$norm <- rowSums(water_seq_table_t[1:5]) water_seq_table_t$hypo <- rowSums(water_seq_table_t[6:8]) water_seq_table_t[1:8] <- NULL water_seq_table_t <- tibble::rownames_to_column(water_seq_table_t, "ASV") names(indvalsummary_tax)[1] <- "ASV" full_indval_sum <- merge(water_seq_table_t, indvalsummary_tax, by = "ASV", all = FALSE) indvalsummary <- tibble::rownames_to_column(indvalsummary, "ASV") write.table(full_indval_sum, "tables/16s-da-asvs/indval_tax.txt", row.names = FALSE, sep = "\t", quote = FALSE) write.table(indvalsummary, "tables/16s-da-asvs/indval_summary.txt", row.names = FALSE, sep = "\t", quote = FALSE) #### ---- scg_tax, echo=FALSE, layout="l-page"-------------------------------------- full_indval_sum <- read.table("tables/16s-da-asvs/indval_tax.txt", header = TRUE) full_indval_sum_noseq <- full_indval_sum full_indval_sum_noseq[,c(15:16)] <- NULL datatable(colnames = c("ASV", "reads", "norm", "hypo", "group", "indval", "pval", "freq", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus"), full_indval_sum_noseq, rownames = FALSE, autoHideNavigation = TRUE, width = "100%", elementId = "twykw9v4dzmp1lymga92", caption = htmltools::tags$caption( style = "caption-side: bottom; text-align: left;", "Differentially Abundant ASVs assessed by Indicator Species Analysis (ISA)."), extensions = c("Buttons", "FixedColumns"), options = list(autoWidth = TRUE, columnDefs = list(list(className = "dt-left", targets = 0)), dom = "Blfrtip", buttons = list(list( extend = 'collection', buttons = c('csv', 'excel', 'copy'), text = 'Download')), pageLength = 10, lengthMenu = c(5, 10, 50, 160), scrollX = TRUE, scrollCollapse = FALSE, scrollY=FALSE, paging=TRUE, fixedColumns = list(leftColumns = 1, rightColumns = 0))) %>% formatRound(columns=c("indval", "pvalue"), digits = 3) %>% formatStyle(columns = colnames(full_indval_sum_noseq), fontSize = '80%') #### ---- export_da_fasta, echo=FALSE, eval=FALSE----------------------------------- # Create fasta file from tax_table table2format2 <- full_indval_sum table2format2_t <- table2format2[table2format2$reads >= 100 ,] #retain only the column with the sequences table2format_trim2 <- table2format2[, 16:15] table2format_trim2_t <- table2format2_t[, 16:15] #format fasta table2format_trim2$ASV_ID <- sub("ASV", ">ASV", table2format_trim2$ASV_ID) table2format_trim2_t$ASV_ID <- sub("ASV", ">ASV", table2format_trim2_t$ASV_ID) write.table(table2format_trim2, "tables/16s-da-asvs/idval_asv_all.fasta", sep = "\n", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") write.table(table2format_trim2_t, "tables/16s-da-asvs/idval_asv_100.fasta", sep = "\n", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- full_tally <- table(indvalsummary_tax$group) trim_tally <- table(table2format2$group) #### ---- eval=FALSE--------------------------------------------------------------- ps_water_100 <- prune_taxa(taxa_sums(ps_water) > 100, ps_water) ps_water_100 #### ---- echo=FALSE--------------------------------------------------------------- ps_water_100 #### ---- eval=FALSE--------------------------------------------------------------- data_tab <- as.data.frame(t(otu_table(ps_water_100))) data_tab <- data_tab %>% rownames_to_column("Group") write.table(data_tab, "16s-anvio/data.txt", sep = "\t", quote = FALSE, row.names = FALSE) #### ---- eval=FALSE--------------------------------------------------------------- data_tab2 <- as.data.frame(t(otu_table(ps_water_100))) total_reads2 <- cbind(data_tab2, total_reads = rowSums(data_tab2)) total_reads2[1:8] <- NULL tax_tab2 <- as.data.frame(tax_table(ps_water_100)) tax_tab2[7] <- NULL total_reads2 <- tibble::rownames_to_column(total_reads2, "Group") tax_tab2 <- tibble::rownames_to_column(tax_tab2, "Group") merge_tabX <- merge(total_reads2, tax_tab2, by = "Group", all = TRUE) drop_c <- c("reads", "norm", "hypo", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus") idval2 <- full_indval_sum_noseq[ , !(names(full_indval_sum_noseq) %in% drop_c)] colnames(idval2)[1:2] <- c("Group", "enriched") additional_layers <- merge(idval2, merge_tabX, by = "Group", all.y = TRUE) write.table(additional_layers, "16s-anvio/additional_layers.txt", sep = "\t", quote = FALSE, row.names = FALSE, na = "") #### ---- eval=FALSE--------------------------------------------------------------- additional_view <- merge_tab2 additional_view[2] <- NULL colnames(additional_view) <- c("id", "site", "oxstate", "period", "no_reads", "no_asvs", "Shannon", "InvSimpson") write.table(additional_view, "16s-anvio/additional_views.txt", sep = "\t", quote = FALSE, row.names = FALSE, na = "") #### ---- eval=FALSE--------------------------------------------------------------- # Make the table anvio_taxa <- ps_water glom_rank <- tax_glom(anvio_taxa, taxrank = 'Class') melt_rank <- psmelt(glom_rank) melt_rank$Class <- as.character(melt_rank$Class) rank_abundance <- aggregate(Abundance~Sample+Class, melt_rank, FUN = sum) rank_abundance <- as.data.frame(cast(rank_abundance, Sample ~ Class)) #rank_abundance <- as.data.frame(rank_abundance) rank_abundance <- tibble::remove_rownames(rank_abundance) rank_abundance <- tibble::column_to_rownames(rank_abundance, "Sample") # Reorder table column by summ layers <- rank_abundance[,names(sort(colSums(rank_abundance), decreasing = TRUE))] # Add the prefix layers <- layers %>% dplyr::rename_all(function(x) paste0("t_class!", x)) layers <- tibble::rownames_to_column (layers, "taxon") # save the table write.table(layers, "16s-anvio/tax_layers.txt", sep = "\t", quote = FALSE, row.names = FALSE, na = "") #### ---- eval=FALSE--------------------------------------------------------------- sam_tree <- read_file("16s-anvio/sample.tre") sam_tree <- gsub("[\r\n]", "", sam_tree) item_name <- c("euc_ward") data_type <- c("newick") data_value <- c(sam_tree) sam_tre_tab <- data.frame(item_name, data_type, data_value) library(janitor) sam_tre_tab %>% remove_empty("rows") write.table(sam_tre_tab, "16s-anvio/sample_tree_tab.txt", sep = "\t", quote = FALSE, row.names = FALSE, na = "") #### ---- export_fasta_anvio, eval=FALSE-------------------------------------------- # Create fasta file from tax_table anvio_tab <- tax_table(ps_water_100) #retain only the column with the sequences anvio_tab_trim <- anvio_tab[, 7] anvio_tab_trim_df <- data.frame(row.names(anvio_tab_trim), anvio_tab_trim) colnames(anvio_tab_trim_df) <- c("ASV_ID", "ASV_SEQ") #format fasta anvio_tab_trim_df$ASV_ID <- sub("ASV", ">ASV", anvio_tab_trim_df$ASV_ID) write.table(anvio_tab_trim_df, "16s-anvio/anvio.fasta", sep = "\r", col.names = FALSE, row.names = FALSE, quote = FALSE, fileEncoding = "UTF-8") #### ---- echo=FALSE, warning=FALSE, fig.height=2---------------------------------- knitr::include_graphics("figures/16s-da-asvs/before.png") #### ---- echo=FALSE, layout="l-page", warning=FALSE, fig.height=3----------------- knitr::include_graphics("figures/16s-da-asvs/after.png") #### ---- echo=FALSE, warning=FALSE, fig.height=2---------------------------------- knitr::include_graphics("figures/16s-da-asvs/cyano_example.png") #### ---- eval=FALSE--------------------------------------------------------------- ps_water_mc <- ps_water #### ---- add_var_cats, echo=FALSE, eval=FALSE-------------------------------------- ## USE This code if you want to start with NA taxa before rename ## First, we need to start with the original phyloseq object (`ps_water_o`) before we modified it in any way. We will copy it then add in metadata (oxstate and period) as before. This way we can have a phyloseq object specifically for metacoder. ps_water_mc <- ps_water_o sample_data(ps_water_mc)$oxstate <- c("normoxic", "normoxic", "normoxic", "normoxic", "normoxic", "hypoxic", "hypoxic", "hypoxic") sample_data(ps_water_mc)$period <- c("cc_after", "cc_during", "cc_during", "cc_during", "cr_after", "cr_during", "cr_during", "cr_during") #### ---- eval=FALSE--------------------------------------------------------------- ps_water_mc_100 <- prune_taxa(taxa_sums(ps_water_mc) > 100, ps_water_mc) ps_water_mc_10 <- prune_taxa(taxa_sums(ps_water_mc) > 10, ps_water_mc) total_reads_ps_water <- sum(readcount(ps_water_mc)) total_reads_ps_water_100 <- sum(readcount(ps_water_mc_100)) total_reads_ps_water_10 <- sum(readcount(ps_water_mc_10)) ps_obj <- ps_water_mc_100 #### ---- eval=FALSE--------------------------------------------------------------- #summarize_phyloseq(ps_water_o) #summarize_phyloseq(ps_water_mc) tax_tab1 <- as.data.frame(tax_table(ps_obj)) tax_tab1 <- tibble::rownames_to_column(tax_tab1, "otu_id") asv_tab1 <- as.data.frame(t(otu_table(ps_obj))) asv_tab1 <- tibble::rownames_to_column(asv_tab1, "otu_id") sam_tab1 <- data.frame(sample_data(ps_obj)) sam_tab1[1] <- NULL da_samp <- tibble::rownames_to_column(sam_tab1, "sample_id") water_mc <- merge(asv_tab1, tax_tab1, by="otu_id") water_mc$ASV_SEQ <- NULL #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- #This code replaces all Genus names with `NA`. Comment out to keep names. # but it messes things up #gen_names <- water_mc$Genus #water_mc <- water_mc %>% replace_with_na_at(.vars = "Genus", # condition = ~.x %in% gen_names) #### ---- eval=FALSE--------------------------------------------------------------- obj <- parse_tax_data(water_mc, class_cols = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "ASV_ID" )) obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = da_samp$sample_id) obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = da_samp$oxstate, cols = da_samp$sample_id) #### ---- eval=FALSE--------------------------------------------------------------- obj$data$diff_table <- compare_groups(obj, data = "tax_abund", cols = da_samp$sample_id, groups = da_samp$oxstate) #### ---- echo=FALSE, eval=FALSE---------------------------------------------------- ### THIS is optional if you want to remove low abund taxa AFTER running the comparative stats obj_t <- obj obj_t$data$tax_data <- zero_low_counts(obj_t, data = "tax_data", min_count = 100, use_total = TRUE) no_reads <- rowSums(obj_t$data$tax_data[, da_samp$sample_id]) == 0 sum(no_reads) obj_t <- filter_obs(obj_t, data = "tax_data", ! no_reads, drop_taxa = TRUE) #### ---- echo=FALSE, eval=FALSE---------------------------------------------------- ### THIS is optional if you want to run fdr obj$data$diff_table$wilcox_p_value <- p.adjust(obj$data$diff_table$wilcox_p_value, method = "fdr") range(obj$data$diff_table$wilcox_p_value, finite = TRUE) range(obj$data$diff_table$log2_median_ratio[is.finite(obj$data$diff_table$log2_median_ratio)]) #### ---- eval=FALSE--------------------------------------------------------------- #set.seed(1999) obj %>% filter_taxa(taxon_names %in% c("Bacteria"), subtaxa = TRUE) %>% # filter_taxa(taxon_ranks == "o", supertaxa = TRUE) %>% # subset to the order rank # filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia", "Archaea"), subtaxa = TRUE, invert = TRUE) # to remove taxa heat_tree( node_label = taxon_names, node_size = n_obs, # node_size_range = c(0.01, 0.05), node_label_size_range = c(0.008, 0.04), node_color = log2_median_ratio, node_color_interval = c(-5.5, 4), edge_color_interval = c(-5.5, 4), node_color_trans = "area", node_color_range = c("#D55E00", "gray", "#0072B2"), node_size_axis_label = "OTU count", node_color_axis_label = "Log 2 median ratio", layout = "da", initial_layout = "re", overlap_avoidance = 2, output_file = "figures/16s-da-asvs/differential_heat_tree.png") #### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'---------- knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree.png") #### ---- eval=FALSE---------------------------------------------------------------- ## This is for the example above #set.seed(10) obj %>% filter_taxa(taxon_names %in% c("Cyanobacteria"), subtaxa = TRUE) %>% heat_tree(node_label = taxon_names, node_size = n_obs, node_size_range = c(0.02, 0.03), node_label_size_range = c(0.02, 0.04), node_color = log2_median_ratio, node_color_interval = c(-5, 5), edge_color_interval = c(-5, 5), node_color_range = c("#D55E00", "gray", "#0072B2"), tree_label = taxon_names, initial_layout = "re", layout = "da", node_color_axis_label = "Sum of root reads", node_size_axis_label = "Number of OTUs", output_file = "figures/16s-da-asvs/cyano_example.svg") #### ---- eval=FALSE--------------------------------------------------------------- #set.seed(10) obj %>% filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia"), subtaxa = TRUE) %>% heat_tree(node_label = taxon_names, node_size = n_obs, node_color = log2_median_ratio, node_color_range = c("#D55E00", "gray", "#0072B2"), node_color_interval = c(-3, 3), tree_label = taxon_names, initial_layout = "re", layout = "da", node_color_axis_label = "Sum of root reads", node_size_axis_label = "Number of OTUs", output_file = "figures/16s-da-asvs/by_major_taxa.png") #### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'---------- knitr::include_graphics("figures/16s-da-asvs/by_major_taxa.png") #### ---- eval=FALSE--------------------------------------------------------------- set.seed(1) obj$data$diff_table2 <- compare_groups(obj, data = "tax_abund", cols = da_samp$sample_id, # What columns of sample data to use groups = da_samp$period) # What category each sample is assigned to heat_tree_matrix(obj, data = "diff_table2", node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon node_label = taxon_names, node_color = log2_median_ratio, # A column from `obj$data$diff_table` node_color_range = diverging_palette(), # The built-in palette for diverging data node_color_trans = "linear", # The default is scaled by circle area node_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display edge_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display node_size_axis_label = "Number of OTUs", node_color_axis_label = "Log2 ratio median proportions", layout = "davidson-harel", # The primary layout algorithm initial_layout = "reingold-tilford", # The layout algorithm that initializes node locations output_file = "figures/16s-da-asvs/differential_heat_tree2.png") # Saves the plot as a png file #### ---- echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'---------- knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree2.png") #### ---- echo=FALSE, eval=FALSE--------------------------------------------------- save.image("rdata/16s-da-asvs/da_asvs_pipeline.rdata") #### ---- calc_avg2, warning=FALSE, echo=FALSE, eval=FALSE-------------------------- ### Script for DESeq NOT FINISHED library(DESeq2) ds2 <- phyloseq_to_deseq2(ps_water, ~ oxstate) dds <- DESeq(ds2) res <- results(dds) deseq.results <- as.data.frame(res) df <- deseq.results df$taxon <- rownames(df) df <- df %>% arrange(log2FoldChange, padj) write.table(res, "res.txt") ########################### ps_water_avg <- transform_sample_counts(ps_water, function(x) x/sum(x)) t(otu_table(ps_water_avg)) pseq.compositional <- microbiome::transform(ps_water, "compositional") t(otu_table(pseq.compositional)) mergedGP <- merge_samples(ps_water_avg, "oxstate") SD <- merge_samples(sample_data(ps_water_avg), "oxstate") OTUnames10 = names(sort(taxa_sums(ps_water), TRUE)[1:100]) GP10 = prune_taxa(OTUnames10, ps_water) t(otu_table(GP10)) mGP10 = prune_taxa(OTUnames10, mergedGP) ocean_samples = sample_names(subset(sample_data(GP), SampleType=="Ocean")) print(ocean_samples) otu_table(mergedGP)[, hypoxic] sample_names(mergedGP) identical(SD, sample_data(mergedGP)) otu.relative <- abundances(ps_water, "compositional") ps_water_ox <- merge_samples(ps_water_AVG, "oxstate") SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate") head(otu_table(ps_water_ox)) ox_otu <- as.data.frame(t(otu_table(ps_water_ox))) ox_tax <- as.data.frame(tax_table(ps_water_ox)) ox_asv_tab <- merge(ox_otu, ox_tax,by = 0, all = TRUE) ox_asv_tab <- ox_asv_tab[,-1] ox_asv_tab <- ox_asv_tab[,c(9,1,2,3,4,5,6,7,8)] transform_sample_counts(ps_water_ox, function(x) x/sum(x))