# Please note this is all the raw R code pulled from all the `.Rmd` files. 
# This code is for the 16s rRNA data only since the metagenomic analyses are a mix
# of anvio commands and R-code

# Use at your own risk as this has not been tested independent of the Rmarkdown 
# workflows. In other words, the code works in the Rmarkdown format but the 
# complete pipeline has not been tested using just this code. I used `knitr::purl()` 
# to pull out the R code from the Rmarkdown file. I did this for you in case you 
# wanted the code and not hear me drone on about colors or zoomable figures. 

# Commands that are commented out are things I tried that I could never get to work. 
# Any line that starts like this: `## ----` is the code chuck name and details. 


#############################
##
## Front Page
##
#############################


#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = FALSE)
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
library("wordcloud2")
library("webshot")
library("htmlwidgets")
library(dplyr)
library(leaflet)
library(htmlwidgets)


#### ----  echo=FALSE, warning=FALSE, layout='l-screen'-----------------------------
knitr::include_graphics("figures/index/banner.png")


#### ----  layout='l-screen-inset shaded', fig.height=5.5---------------------------

#stri_icon <- makeIcon(
#  iconUrl = "assets/stri_logo.png",
#  iconWidth = 38, iconHeight = 38
#  iconAnchorX = 22, iconAnchorY = 94,
#)

leaflet() %>%
  setView(-82.235052, 9.277137, zoom = 12) %>%
  addTiles() %>% 
  addScaleBar(position = "topright") %>% 
#  addMiniMap(position = "topright", zoomLevelOffset = -5) %>%
  addCircleMarkers(lng = c(-82.334720, -82.131119), 
             lat = c(9.219640, 9.248433), radius = 22.5,
             label = c("Cayo Roldan (impacted)", "Coral Caye (control)"),
             labelOptions = labelOptions(textOnly = TRUE, 
                                         noHide = TRUE, 
                                         direction = "bottom", offset = c(6, 12), 
                                         style = list("font-size" = "14px" , 
                                                      "font-style" = "bold")),
             color = c("#141414", "#141414"), 
             fillColor = c("#D55E00", "#0072B2"), 
             stroke = TRUE, fillOpacity = 0.9, weight = 10) %>%
  addCircleMarkers(lng = -82.256463, 
             lat = 9.352402, 
             label = "STRI",
             labelOptions = labelOptions(textOnly = TRUE, 
                                         noHide = TRUE, 
                                         direction = "left", offset = c(-20, 1), 
                                         style = list("font-size" = "20px" , 
                                                      "font-style" = "bold")),
             color = "#0072B2", 
             fillColor = "#F0E442", 
             stroke = TRUE, fillOpacity = 0.95, weight = 5,
             opacity = 0.95) 


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
filePath <- "files/main.txt"
text <- readLines(filePath)
docs <- Corpus(VectorSource(text))
inspect(docs)
toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))
# Remove numbers
docs <- tm_map(docs, removeNumbers)
# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))
# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("used", "based", "samples",
                                    "using", "sample", "one",
                                    "two", "site", "within", "fig"))
# Remove punctuations
docs <- tm_map(docs, removePunctuation)
# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)
# Text stemming
docs <- tm_map(docs, stemDocument)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m), decreasing=TRUE)
d <- data.frame(word = names(v),freq = v)
head(d, 200)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
set.seed(1234)
d1 <- d %>% filter(freq > 1)

#wordcloud(words = d$word, freq = d$freq, min.freq = 1, scale=c(2,.5),
#          max.words=200, random.order=FALSE, rot.per=0.35,
#          colors=brewer.pal(8, "Dark2"))
wc <- wordcloud2(data = d1, size = 1.1, color = 'random-dark', fontFamily = "serif", gridSize = 0)
saveWidget(wc, "tmp.html", selfcontained = F)
webshot("tmp.html", "wordcloud.png", delay = 10, vwidth = 1080, vheight = 480)


#### ----  echo=FALSE, warning=FALSE, fig.height=1.5, layout='l-screen'-------------
knitr::include_graphics("figures/index/wordcloud.png")

#############################
##
## Site Overview
##
#############################

#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(DT)
library(ggplot2)
library(scales)
library(magick)
library(gridExtra)
library(grid)
options(scipen=999)


#### ---- color_121,  layout="l-body-outset", comment='', include=FALSE-------------
# Wong palette
wong_pal <- c("#009E73", "#D55E00", "#F0E442",
                "#CC79A7", "#56B4E9", "#E69F00",
                "#0072B2", "#7F7F7F", "#000000")
palette(wong_pal)
gs1 <- lapply(wong_pal, function(ii, wong_pal)
  grobTree(
    rectGrob(
      gp = gpar(fill = ii)),
    textGrob(ii, gp = gpar(col = "white", fontsize = 10))))
gs1 <- grid.arrange(grobs = gs1, ncol = 3,
               top = "8-COLOR PALETTE", right = "")

# 12 color palette
twelve_pal <- c("#323232", "#BF3465", "#50B29E", "#D9D9D9",
                 "#731683", "#1C6CCC", "#21BCFF", "#DFA5E5",
                 "#874310", "#DB6D1B", "#B8CE17", "#F4E345")
palette(twelve_pal)
gs2 <- lapply(twelve_pal, function(ii, twelve_pal)
  grobTree(
    rectGrob(
      gp = gpar(fill = ii)),
    textGrob(ii, gp = gpar(col = "white", fontsize = 10))))
gs2 <- grid.arrange(grobs = gs2, ncol = 4,
               top = "12-COLOR PALETTE", left = "")


#### ---- color_8,  layout="l-body-outset", comment='', echo=FALSE, fig.height=3----
grid.arrange(gs1, gs2, ncol = 2)


#############################
##
## Field Analyses
##
#############################


#### ---- setup, include = FALSE----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
rm(list = ls())
library(lme4)
library(lmerTest)
library(car)
library(rcompanion)
library(MASS)
library(emmeans)
library(nlme)
library(ggplot2)
library(plyr)
library(RColorBrewer)
library(wesanderson)
library(reshape2) # restructuring the data
library(cowplot)
library(scales)
library(tidyr)
library(dplyr)
library(patchwork)


#### ---- --------------------------------------------------------------------------
data <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",")
attach(data)


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(yield, main = "PAM", xlab = "Yield")


#### ---- --------------------------------------------------------------------------
lmepam <- lmer(yield ~ site + (1|colony), REML = FALSE, na.action = na.exclude)
res1 <- resid(lmepam)


#### ----  eval=FALSE, include=FALSE------------------------------------------------
hist(res1, main = "PAM", xlab = "Residuals")
plotNormalHistogram(res1)


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res1, main = "PAM", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
pam.shapiro <- shapiro.test(res1)
print(pam.shapiro)  


#### ---- --------------------------------------------------------------------------
qqnorm(res1, ylab = "Sample Quantiles for PAM")
qqline(res1, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(yield ~ as.factor(site), data = data)
bartlett.test(x = yield, g = site)


#### ---- --------------------------------------------------------------------------
pam1 <- lmer(yield ~ site + (1|colony/frag), data = data, 
             REML = FALSE, na.action = na.exclude)
ranova(pam1)


#### ---- --------------------------------------------------------------------------
pam2 <- lmer(yield ~ site + (1|colony), data = data, 
             REML = FALSE, na.action = na.exclude)
ranova(pam2)


#### ---- --------------------------------------------------------------------------
mod.AIC <- AIC(pam1, pam2) 
mod.AIC 


#### ---- --------------------------------------------------------------------------
finalpam <- lmer(yield ~ site + (1|colony), 
                 data = data, 
                 REML = TRUE, na.action = na.exclude)


#### ---- --------------------------------------------------------------------------
finalrandpam <- ranova(finalpam)
pam.table <- anova(finalpam, type = 2)


#### ---- --------------------------------------------------------------------------
pam.table


#### ---- --------------------------------------------------------------------------
finalrandpam


#### ---- --------------------------------------------------------------------------
attach(data)
plotNormalHistogram(zoox, main = "Zooxanthellae", xlab = "Counts")


#### ---- --------------------------------------------------------------------------
lmezoox <- lmer(zoox ~ site + (1|colony), REML = FALSE, na.action = na.exclude)
res2 <- resid(lmezoox)


#### ----  include=FALSE, eval = FALSE----------------------------------------------
hist(res2, main = "Zoox", xlab = "Residuals")
plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res2, main = "Zooxanthellae", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
zoox.shapiro <- shapiro.test(res2) #runs a normality test on residuals
print(zoox.shapiro) # null = normally distrubuted (P<0.05 = non-normal


#### ---- --------------------------------------------------------------------------
qqnorm(res2, ylab = "Sample Quantiles for Zoox")
qqline(res2, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(zoox~ as.factor(site),data = data) # p = 0.0007486


#### ---- --------------------------------------------------------------------------
data$zoox_log = log(zoox)
attach(data)
colnames(data)


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(zoox_log, main = "Zooxanthellae", xlab = "log (counts)")


#### ---- --------------------------------------------------------------------------
lmezoox_log <- lmer(zoox_log ~ site + (1|colony), 
                    REML = FALSE, 
                    na.action = na.exclude)
res3 <- resid(lmezoox_log)


#### ----  include=FALSE, eval = FALSE----------------------------------------------
hist(res3, main = "zoox log", xlab = "Residuals")
plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res3, main = "Zooxanthellae log", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
zooxlog.shapiro <- shapiro.test(res3)
print(zooxlog.shapiro)


#### ---- --------------------------------------------------------------------------
qqnorm(res3, ylab = "Sample Quantiles for Zooxlog")
qqline(res3, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(zoox_log ~ as.factor(site), data = data)


#### ---- --------------------------------------------------------------------------
zoox1 <- lmer(zoox_log ~ site + (1|colony/frag), 
              data = data, 
              REML = FALSE, 
              na.action = na.exclude) #AIC 72.8756
ranova(zoox1)


#### ---- --------------------------------------------------------------------------
zoox2 <- lmer(zoox_log ~ site + (1|colony), 
              data = data, 
              REML = FALSE, 
              na.action = na.exclude)
ranova(zoox2)
mod.AIC <- AIC(zoox1, zoox2) 
mod.AIC


#### ---- --------------------------------------------------------------------------
finalzoox <- lmer(zoox_log ~ site + (1|colony), 
                  data = data, 
                  REML = TRUE, 
                  na.action = na.exclude)


#### ---- --------------------------------------------------------------------------
finalrandzoox <- ranova(finalzoox)
zoox.table <- anova(finalzoox, type = 2) #ANOVA table
zoox.table
finalrandzoox


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chla, main = "Chlorophyll a", xlab = "Chlorophyll")


#### ---- --------------------------------------------------------------------------
lmechla <- lmer(chla ~ site + (1|colony), 
                REML = FALSE, 
                na.action = na.exclude)
res4 <- resid(lmechla)


#### ----  include=FALSE, eval=FALSE------------------------------------------------
hist(res4, main = "Chla", xlab = "Residuals")
plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res4, main = "Chlorophyll a", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
chla.shapiro <- shapiro.test(res4)
print(chla.shapiro) 


#### ---- --------------------------------------------------------------------------
qqnorm(res4, ylab = "Sample Quantiles for Zoox")
qqline(res4, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(chla ~ as.factor(site), data = data) # p = 0.01606


#### ---- --------------------------------------------------------------------------
data$chla_log = log(chla)


#### ---- --------------------------------------------------------------------------
head(data)
attach(data)
colnames(data)


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chla_log, main = "Chlorophyll a", xlab = "log (Chlorophyll)")


#### ---- --------------------------------------------------------------------------
lmechla_log <- lmer(chla_log ~ site + (1|colony), 
                    REML = FALSE, 
                    na.action = na.exclude)
res7 <- resid(lmechla_log)


#### ----  include=FALSE, eval = FALSE----------------------------------------------
hist(res7, main = "chla log", xlab = "Residuals")
plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res7, main = "Chlorophyll a", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
chlalog.shapiro <- shapiro.test(res7) #runs a normality test on residuals
print(chlalog.shapiro) # null = normally distrubuted (P<0.05 = non-normal


#### ---- --------------------------------------------------------------------------
qqnorm(res7, ylab = "Sample Quantiles for chlalog")
qqline(res7, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(chla_log ~ as.factor(site), data = data) # p = 0.7487


#### ---- --------------------------------------------------------------------------
chla_log1 <- lmer(chla_log ~ site+(1|colony/frag), 
                  data = data,
                  REML = FALSE, 
                  na.action = na.exclude) #AIC 55.11567
ranova(chla_log1)


#### ---- --------------------------------------------------------------------------
chla_log2 <- lmer(chla_log ~ site + (1|colony), 
                  data = data, 
                  REML = FALSE, 
                  na.action = na.exclude)
ranova(chla_log2)

mod.AIC <- AIC(chla_log1, chla_log2) 
mod.AIC


#### ---- --------------------------------------------------------------------------
finalchla_log <- lmer(chla_log ~ site + (1|colony), 
                      data = data, 
                      REML = TRUE, 
                      na.action = na.exclude)


#### ---- --------------------------------------------------------------------------
finalchla_logrand <- ranova(finalchla_log)
chla_log.table <- anova(finalchla_log, type = 2) #ANOVA table
chla_log.table
finalchla_logrand


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(chlc, main = "Chlorophyll c", xlab = "Chlorophyll")


#### ---- --------------------------------------------------------------------------
lmechlc <- lmer(chlc ~ site + (1|colony), 
                REML = FALSE, 
                na.action = na.exclude)
res5 <- resid(lmechlc)


#### ----  eval=FALSE, include=FALSE------------------------------------------------
hist(res5,main = "Chlorophyll c", xlab = "Residuals")
plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res5, main = "Chlorophyll c", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
chlc.shapiro <- shapiro.test(res5)
print(chlc.shapiro)


#### ---- --------------------------------------------------------------------------
qqnorm(res5, ylab = "Sample Quantiles for Zoox")
qqline(res5, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(chlc ~ as.factor(site), data = data) # p = 0.01316


#### ---- --------------------------------------------------------------------------
data$chlc_log <- log(chlc)
attach(data)
colnames(data)


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(data$chlc_log, main = "Chlorophyll c (log)", xlab = "log (Chlorophyll)")


#### ---- --------------------------------------------------------------------------
lmechlc_log <- lmer(chlc_log ~ site + (1|colony), 
                    REML = FALSE, 
                    na.action = na.exclude)
res6 <- resid(lmechlc_log)


#### ----  include=FALSE, eval=FALSE------------------------------------------------
hist(res6, main = "Chlorophyll c log", xlab = "Residuals")
plotNormalHistogram(res6, main = "Chlorophyll c log", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
plotNormalHistogram(res6, main = "Chlorophyll c (log)", xlab = "Residuals")


#### ---- --------------------------------------------------------------------------
chlclog.shapiro <- shapiro.test(res6)
print(chlclog.shapiro)


#### ---- --------------------------------------------------------------------------
qqnorm(res6, ylab = "Sample Quantiles for chlclog")
qqline(res6, col = "red")


#### ---- --------------------------------------------------------------------------
leveneTest(chlc_log ~ as.factor(site), data = data) # p = 0.884


#### ---- --------------------------------------------------------------------------
chlc_log1 <- lmer(chlc_log ~ site + (1|colony/frag), 
                  data = data, 
                  REML = FALSE, 
                  na.action = na.exclude) #AIC 64.52399
ranova(chlc_log1)


#### ---- --------------------------------------------------------------------------
chlc_log2 <- lmer(chlc_log ~ site+(1|colony), 
                  data = data, 
                  REML = FALSE, 
                  na.action = na.exclude)
ranova(chlc_log2)

mod.AIC <- AIC(chlc_log1, chlc_log2) 
mod.AIC


#### ---- --------------------------------------------------------------------------
finalchlc_log <- lmer(chlc_log ~ site + (1|colony), 
                      data = data, 
                      REML = TRUE, 
                      na.action = na.exclude)


#### ---- --------------------------------------------------------------------------
finalchlc_logrand <- ranova(finalchlc_log)
chlc_log.table <- anova(finalchlc_log, type = 2) #ANOVA table
chlc_log.table
finalchlc_logrand


#### ---- --------------------------------------------------------------------------
all <- read.table("tables/field/coralhypoxia.csv", header = T, sep = ",")
attach(all)


#### ---- --------------------------------------------------------------------------
means.all <- ddply(all, c("site", "colony"), summarise,
                 N    = sum(!is.na(yield)),
                 yield_mean = mean(yield, na.rm = TRUE),
                 zoox_mean = mean(zoox, na.rm = TRUE),
                 chla_mean = mean(chla, na.rm = TRUE),
                 chlc_mean = mean(chlc, na.rm = TRUE),
                 sd_yield = sd(yield),
                 sd_zoox = sd(zoox),
                 sd_chla = sd(chla),
                 sd_chlc = sd(chlc),
                 yield_se = (sd(yield)/sqrt(N)),
                 zoox_se = (sd(zoox)/sqrt(N)),
                 chla_se = (sd(chla)/sqrt(N)),
                 chlc_se = (sd(chlc)/sqrt(N)))
write.csv(means.all, file = "tables/field/allcoralmeans.csv", 
          row.names = FALSE)


#### ---- --------------------------------------------------------------------------
ggplot(all,aes(x = site, y = yield)) +
  geom_boxplot() +
  labs(x = "Site", y='Maximum Quantum Yield') + 
  theme_classic() +
  theme(text = element_text(size=14)) +
  theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1),
        axis.text.y = element_text(colour = "black")) +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0.4,0.75))


#### ---- --------------------------------------------------------------------------
ggplot(all,aes(x=site, y=yield, color=colony)) +
  geom_point(size=3) + 
  labs(x = "Site", y = 'Maximum Quantum Yield') + 
  theme_classic() +
  theme(text = element_text(size=14)) +
  theme(axis.text.x = element_text(colour="black", angle=0, hjust=1),
        axis.text.y = element_text(colour="black")) +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0.4,0.75))


#### ---- --------------------------------------------------------------------------
ggplot(means.all,aes(x = site, y = yield_mean,fill = site)) +
  geom_boxplot() +
  labs(x = "Site", y = 'Maximum Quantum Yield') + 
  theme_classic() +
  scale_fill_manual(values = c("steelblue","darkred")) +
  theme(text = element_text(size = 14)) + 
  theme(axis.text.x = element_text(colour = "black", angle = 0, hjust = 1),
        axis.text.y = element_text(colour = "black")) +
  scale_y_continuous(expand = c(0,0),
                     limits = c(0.4,0.75)) 


#### ---- --------------------------------------------------------------------------
jitter <- position_jitter(width = 0.1)  
yield_plot <- ggplot(means.all, aes(x = site, y = yield_mean))


#### ---- --------------------------------------------------------------------------
yp <- yield_plot+
  geom_boxplot(color="black",
               aes(fill=site),
               width=0.4) +
  scale_fill_manual(values=c("skyblue3","coral3"), 
                    name= NULL, labels=c("Normoxic","Hypoxic")) +
  geom_point(size=2, position=jitter) +
  labs(x="Site",y="Maximum Quantum Yield") +
  theme_classic() +
  theme(legend.position = c(0.85,0.85),
        legend.title= element_text(color="transparent"),
        axis.text.x = element_text(colour="black"), 
        axis.text.y = element_text(colour="black"),
        axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
        panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
  theme(text = element_text(size=12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
  scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7))
yp


#### ---- --------------------------------------------------------------------------
yield_zoox <- ggplot(means.all, aes(x=site, y=zoox_mean))
zp <- yield_zoox+
  geom_boxplot(color="black",
               aes(fill=site),
               width=0.4) +
  scale_fill_manual(values=c("skyblue3","coral3"), 
                    name= NULL, labels=c("Normoxic","Hypoxic")) +
  geom_point(size=2, position=jitter) +
  labs(x="Site",
       y=("Endosymbiont Density (cells cm^-2)")) +
  theme_classic() +
  theme(legend.position = "none", #
        axis.text.x = element_text(colour="black"),
        axis.text.y = element_text(colour="black"),
        axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
        panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
  theme(text = element_text(size=12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
  scale_y_continuous(labels = scientific,
                     breaks=seq(0,1.5E6, 4.5E5),
                      expand = c(0,0), 
                     limits = c(0,1.5E6))
zp  


#### ---- --------------------------------------------------------------------------
pigs <- tidyr::gather(means.all, pigment, concentration,chla_mean, chlc_mean)
pigments <- dplyr::select(pigs,site,colony,pigment,concentration)


#### ---- --------------------------------------------------------------------------
pig_plot <- ggplot(pigments,
                     aes(x=pigment, y=concentration, fill=site))


#### ---- --------------------------------------------------------------------------
pig_plot+
  geom_boxplot(color="black") +
  scale_fill_manual(values=c("skyblue3","coral3"), 
                    name= NULL, labels=c("Normoxic","Hypoxic"))


#### ---- --------------------------------------------------------------------------
jitter2 <- position_jitter(width=0.05)  
chla_plot <- ggplot(means.all,
                     aes(x=site, y=chla_mean))


#### ---- --------------------------------------------------------------------------
cp <- chla_plot+
  geom_boxplot(color="black",
               aes(fill=site),
               width=0.4) +
  scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, labels=c("Normoxic","Hypoxic")) +
  geom_point(size=2, position=jitter) +
  labs(x="Site",
       y=("Chl a Concentration (ug cm^-2)")) +
  theme_classic() +
  theme(legend.position = "none",
        axis.text.x = element_text(colour="black"), 
        axis.text.y = element_text(colour="black"),
        axis.title.y = element_text(margin = margin(t=0, r=10,b=0,l=0)),
        panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
  theme(text = element_text(size=12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
  scale_y_continuous(expand = c(0,0), 
                     limits = c(0,6))
cp


#### ----  echo=FALSE, fig.height=4, layout='l-body-outset', eval=FALSE-------------
bar_plots <-  yp + zp + cp
bar_plots + plot_annotation(tag_levels = 'A',
  title = 'Heatmaps of taxa distribution',
  #subtitle = 'Top 30 taxa of non-rarefied data',
  caption = 'A) DETAILS,
  B) DETAILS,
  C) DETAILS'
)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
plot_grid(yp, cp,zp, labels="AUTO", ncol=3, align="v")


#### ---- --------------------------------------------------------------------------
ypp <- yield_plot +
  geom_boxplot(color = "black", aes(fill = site), width = 0.4) +
  scale_fill_manual(values = c("skyblue3", "coral3"), name = NULL, 
                    labels = c("Normoxic", "Hypoxic")) +
  geom_point(size = 1, position = jitter) +
  labs(x = "Site", y = ("Maximum Quantum Yield")) +
  theme_classic() +
  theme(legend.position = "top", legend.direction = "vertical", #
        legend.title= element_text(color = "transparent"),
        axis.text.x = element_text(size = 7, colour="black"), 
        axis.text.y = element_text(size = 8, colour = "black"),
        axis.title.x = element_blank(),
        axis.title.y = element_text(size = 9, 
                                    margin = margin(t = 0, r = 10, 
                                                    b = 0, l = 0)),
        panel.border = element_rect(colour = "black", 
                                    fill = NA, size = 0.5)) +
  theme(text = element_text(size = 12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay", "Roldan" = "Cayo Roldan")) +
  scale_y_continuous(expand = c(0,0), limits = c(0.5,0.7))


#### ---- --------------------------------------------------------------------------
yzield_zoox <- ggplot(means.all, aes(x = site, y = zoox_mean))

zpp <- yield_zoox+
  geom_boxplot(color="black",
               aes(fill=site),
               width=0.4) +
  scale_fill_manual(values=c("skyblue3","coral3"), name= NULL, 
                    labels=c("Normoxic","Hypoxic")) +
  geom_point(size=1, position=jitter) +
  labs(x = "Site", y=("Endosymbiont Density (cells cm^-2)")) +
  theme_classic() +
  theme(legend.position = "none",
        axis.text.x = element_text(size = 7, colour="black"), 
        axis.title.x = element_text(size = 12),
        axis.text.y = element_text(size = 8, colour="black"),
        axis.title.y = element_text(size = 9, margin = margin(t=0, r=10,b=0,l=0)),
        panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
  theme(text = element_text(size=12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
  scale_y_continuous(labels = scientific,
                     breaks=seq(0,1.5E6, 4.5E5),
                     expand = c(0,0), 
                     limits = c(0,1.5E6))


#### ---- --------------------------------------------------------------------------
chla_plot <- ggplot(means.all, aes(x = site, y = chla_mean))

cpp <- chla_plot +
  geom_boxplot(color="black", aes(fill=site), width=0.4) +
  scale_fill_manual(values = c("skyblue3","coral3"), 
                    name = NULL, labels = c("Normoxic", "Hypoxic")) +
  geom_point(size=1, position=jitter) + 
  labs(x = "Site", y = ("Chl a Concentration (ug cm^-2)")) +
  theme_classic() +
  theme(legend.position = "none",
        axis.text.x = element_text(size = 7, colour="black"), 
        axis.text.y = element_text(size = 8, colour="black"),
        axis.title.y = element_text(size = 9, 
                                    margin = margin(t = 0, r = 10, 
                                                    b = 0, l = 0)),
        axis.title.x = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA, size=.5)) +
  theme(text = element_text(size=12)) +
  scale_x_discrete(labels=c("Coral" = "Coral Cay","Roldan" = "Cayo Roldan")) +
  scale_y_continuous(expand = c(0,0), 
                     limits = c(0,6))


#### ----  echo=FALSE, fig.height=5, layout='l-body-outset'-------------------------
bar_plots_ft <-  ypp + zpp + cpp
bar_plots_ft + plot_annotation(tag_levels = 'A',
  title = 'Coral response variables ',
  #subtitle = 'Top 30 taxa of non-rarefied data',
  caption = 'A) Pulse Amplitude Modulated (PAM),
  B) Zooxanthellae,
  C) Chlorophyll a (Chla)'
)


#### ----  eval=FALSE, echo=FALSE---------------------------------------------------
panel <- plot_grid(ypp, zpp,cpp, labels="AUTO", ncol=1, align="v")
panel


##########################################################
## 16s rRNA Workflow
##########################################################

#############################
##
## No 1. DADA2 Workflow
##
#############################

#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(911)
library(dada2); packageVersion("dada2")
library(ShortRead); packageVersion("ShortRead")
library(ggplot2); packageVersion("ggplot2")
library(rmarkdown)
library(DT)
library(gridExtra)
library(grid)
knitr::opts_current$get(c(
  "cache",
  "cache.path",
  "cache.rebuild",
  "dependson",
  "autodep"
))


#### ---- sample_seq_table, echo=FALSE, layout="l-body-outset", eval=FALSE----------
seq_table <- read.table("tables/16s-dada2/sample-seq-info-2019-16s.txt",
                        header = TRUE, sep = "\t")
datatable(seq_table, width = "100%",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'lfrtipB',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8
            )
          )


cutadapt -g {F-ADAPTER} -G {R-ADAPTER} /

                        -o ${R1}.trimmed.fastq /

                        -p {R2}.trimmed.fastq /

                        ${R1} ${R2} /

                        --discard-untrimmed -e 0.12


#### ---- list_fastq_filesX, eval=FALSE---------------------------------------------
path <- "INPUT_FILES/"
head(list.files(path))


#### ---- dummy_1X, echo=FALSE------------------------------------------------------
c("WCC0_R1.trimmed.fastq", "WCC0_R2.trimmed.fastq", "WCC1_R1.trimmed.fastq",
   "WCC1_R2.trimmed.fastq", "WCC2_R1.trimmed.fastq", "WCC2_R2.trimmed.fastq")


#### ---- sort_files, eval=FALSE----------------------------------------------------
fnFs <- sort(list.files(path, pattern = "_R1.trimmed.fastq"))
fnRs <- sort(list.files(path, pattern = "_R2.trimmed.fastq"))


#### ---- split_nameX, eval=FALSE---------------------------------------------------
sample.names <- sapply(strsplit(fnFs, "_"), `[`, 1)
fnFs <-file.path(path, fnFs)
fnRs <-file.path(path, fnRs)


#### ---- plot_qscoresX, layout="l-body-outset", warning=FALSE, fig.height=3, eval=FALSE----
p1 <- plotQualityProfile(fnFs[1:8], aggregate = TRUE)
p2 <- plotQualityProfile(fnRs[1:8], aggregate = TRUE)

p3 <- grid.arrange(p1, p2, nrow = 1)
ggsave("plot_qscores_compare.png", p3, width = 7, height = 3)


#### ---- dummy_2X, echo=FALSE, layout="l-body-outset", warning=FALSE, fig.height=2----
knitr::include_graphics("figures/16s-dada2/plot_qscores_compare.png")


#### ---- move_files, eval=FALSE----------------------------------------------------
filt_path <- file.path(path, "filtered")
filtFs <- file.path(filt_path, paste0(sample.names, "_F_filt.fastq.gz"))
filtRs <- file.path(filt_path, paste0(sample.names, "_R_filt.fastq.gz"))


#### ---- filterX, eval=FALSE-------------------------------------------------------
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs,
                     truncLen=c(260,150), maxN=0,
                     maxEE=c(3,3), truncQ=2,
                     rm.phix=TRUE, compress=TRUE,
                     multithread = 20)


#### ---- filter_table, echo=FALSE, eval=FALSE--------------------------------------
datatable(out, width = "80%",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8
            )
          )


#### ---- dummy_4X, echo=FALSE------------------------------------------------------
out_T <- read.table("tables/16s-dada2/out_T.txt",
                       header = TRUE)
datatable(out_T, width = "80%",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8
            )
          )


#### ---- learn_errors_forwardX, eval=FALSE-----------------------------------------
errF <- learnErrors(filtFs, multithread = 20)


#### ---- learn_errors_reverseX, eval=FALSE-----------------------------------------
errR <- learnErrors(filtRs, multithread = 20)


#### ---- plot_errorFX, warning=FALSE, layout="l-body-outset", eval=FALSE-----------
plotErrors(errF, nominalQ = TRUE)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
p4 <- plotErrors(errF, nominalQ = TRUE)
ggsave("plot_errorF_1.png", p4, width = 7, height = 5)
ggsave("plot_errorF_2.png", p4)


#### ---- dummy_6X, echo=FALSE, layout="l-body-outset"------------------------------
knitr::include_graphics("figures/16s-dada2/plot_errorF_1.png")


#### ---- plot_errorRX, warning=FALSE, layout="l-body-outset", eval=FALSE-----------
plotErrors(errR, nominalQ=TRUE)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
p5 <- plotErrors(errR, nominalQ = TRUE)
ggsave("plot_errorR_1.png", p5, width = 7, height = 5)
ggsave("plot_errorR_2.png", p5)


#### ---- dummy_7RX, echo=FALSE, layout="l-body-outset"-----------------------------
knitr::include_graphics("figures/16s-dada2/plot_errorR_1.png")


#### ----  echo=FALSE---------------------------------------------------------------
TO run and cache the derepFastq you must set
`cache.lazy=FALSE`
For both code blocks


#### ---- dereplicate_reads_F, eval=FALSE-------------------------------------------
derepFs <- derepFastq(filtFs)
names(derepFs) <- sample.names


#### ---- dereplicate_reads_R, eval=FALSE-------------------------------------------
derepRs <- derepFastq(filtRs)
names(derepRs) <- sample.names


#### ---- run_dada2_forwardX1, eval=FALSE-------------------------------------------
dadaFs <- dada(derepFs, err = errF, multithread = 20)


#### ---- run_dada2_reverseX1, eval=FALSE-------------------------------------------
dadaRs <- dada(derepRs, err = errR, multithread = TRUE)


#### ---- inspect_f, eval=FALSE-----------------------------------------------------
dadaFs[[1]]


#### ---- inspect_r, eval=FALSE-----------------------------------------------------
dadaRs[[1]]


#### ---- merge_paired_reads, eval=FALSE--------------------------------------------
mergers <- mergePairs(dadaFs, derepFs, dadaRs, derepRs)


#### ---- head_file, eval = FALSE, echo = FALSE-------------------------------------
head(mergers[[1]])


#### ---- seq_tableX, eval=FALSE----------------------------------------------------
seqtab <- makeSequenceTable(mergers)
dim(seqtab)


#### ---- echo=FALSE----------------------------------------------------------------
c(8, 4132)


#### ---- seq_table2X, eval=FALSE---------------------------------------------------
table(nchar(getSequences(seqtab)))


#### ---- trim_lengthX, eval=FALSE--------------------------------------------------
seqtab.2 <- seqtab[,nchar(colnames(seqtab)) %in% seq(360,380)]
dim(seqtab.2)


#### ---- dummy_14X, echo=FALSE-----------------------------------------------------
c(8, 4119)


#### ---- trim_length2X,  eval=FALSE------------------------------------------------
table(nchar(getSequences(seqtab.2)))


#### ---- chimera_on_ind_runs, eval=FALSE-------------------------------------------
seqtab.2.nochim <- removeBimeraDenovo(seqtab.2,
                                      method="consensus",
                                      multithread = 20)
dim(seqtab.2.nochim)


#### ---- dummy_16, echo=FALSE------------------------------------------------------
c(8, 1424)


#### ---- chimera_on_ind_runs2, eval=FALSE------------------------------------------
sum(seqtab.2.nochim)/sum(seqtab.2)


#### ---- dummy_17, echo=FALSE------------------------------------------------------
0.874367


#### ---- build_table_to_track_reads, eval=FALSE------------------------------------
getN <- function(x) sum(getUniques(x))
track <- cbind(out,
               sapply(dadaFs, getN),
               sapply(dadaRs, getN),
               sapply(mergers, getN),
               rowSums(seqtab.2.nochim))
colnames(track) <- c("input", "filtered", "denoisedF",
                     "denoisedR", "merged", "nonchim")
rownames(track) <- sample.names


#### ---- dummy_44X, echo=FALSE, layout="l-body-outset"-----------------------------
sum_run <- read.table("tables/16s-dada2/read_changes.txt",
                       header = TRUE)
datatable(sum_run, width = "80%",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8
            )
          )


#### ---- track_changes, eval=FALSE, echo=FALSE-------------------------------------
write.table(track, "tables/16s-dada2/read_changes.txt",
            sep = "\t", quote = FALSE, col.names=NA)


#### ---- saveRDS, eval=FALSE-------------------------------------------------------
saveRDS(seqtab.2.nochim, "seqtab.2.nochim.rds")


#### ---- read_combo, eval=FALSE----------------------------------------------------
remove(list = ls())
seqtab <- readRDS("seqtab.2.nochim.rds")


#### ---- assign_tax_silva, eval = FALSE--------------------------------------------
tax_silva <- assignTaxonomy(
  seqtab, "../taxonomy_databases/silva_nr_v138_train_set.fa.gz",
  multithread = TRUE)


#### ---- save_image, eval=FALSE----------------------------------------------------
save.image("rdata/16s-dada2/water_pipeline.rdata")

#############################
##
## No 2. Data Preparation
##
#############################


#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
options(scipen=999)
knitr::opts_current$get(c(
  "cache",
  "cache.path",
  "cache.rebuild",
  "dependson",
  "autodep"
))


#### ----  echo=FALSE---------------------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunkc to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-data-prep/16s-data_prep.rdata")


#### ---- sample_table1, echo=FALSE, eval=FALSE-------------------------------------
joined_tab <- read.table(
  "tables/16s-data-prep/read_changes.txt",
  header = TRUE, sep = "\t")

joined_tab_percent <- joined_tab
percent_change <- 1-(joined_tab$nonchim/joined_tab$input) %>%
  round(digits = 3)
joined_tab_percent$Change <- as.numeric(sprintf("%.3f", percent_change))
colnames(joined_tab_percent) <- c("Sample<br/>ID", "Type", "Site",
                                  "input", "filter", "denoiseF", "denoiseR",
                                  "merged", "nochim", "Change")


#### ---- samp-summary1, echo=FALSE, layout="l-page"--------------------------------
NOTE. For some reason this datatable was being given the same
elementId as the first table. So I had to add a 20 character
elementId https://www.random.org/strings/
datatable(joined_tab_percent, width = "100%", escape = FALSE,
          rownames = FALSE, filter = 'top',
          caption = htmltools::tags$caption(
            style = 'caption-side: bottom; text-align: left;',
            'Table: ', htmltools::em('Tracking read changes
            through DADA2 workflow. Use the buttons to navigate through
            the table or download a copy. Table may scroll right. ')),
          elementId = "vwltiu9qj77sfy3a07we",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8
            )
          ) %>%
  DT::formatStyle(columns = colnames(joined_tab_percent), fontSize = '80%')
cutoff <- 2000
remove_sam <- joined_tab[joined_tab$nonchim <= cutoff, ]


#### ---- deliniate_sample_types, eval=FALSE----------------------------------------
load("rdata/16s-dada2/water_pipeline.rdata")
samples.out <- rownames(seqtab)
subject <- sapply(strsplit(samples.out, "[[:digit:]]"), `[`, 1)
# this splits the string at first instance of a digit
sample_name <- substr(samples.out, 1, 999)  # use the whole string for individuals
type <- substr(samples.out, 0, 1)  # use the first two letters for sample typle
site <- substr(samples.out, 2, 3)  # use the next three letters for site
num_samp <- length(unique(sample_name))
num_type <- length(unique(type))
num_sites <- length(unique(site))


#### ---- define_variables----------------------------------------------------------
#define a sample data frame
samdf <- data.frame(SamName = sample_name, TYPE = type, SITE = site)
rownames(samdf) <- samples.out


#### ---- create_ps_object, eval=FALSE----------------------------------------------
# this create the phyloseq object
ps <- phyloseq(otu_table(seqtab, taxa_are_rows = FALSE),
                   sample_data(samdf), tax_table(tax_silva))
tax_table(ps) <- cbind(tax_table(ps),
                           rownames(tax_table(ps)))

# adding unique ASV names
taxa_names(ps) <- paste0("ASV", seq(ntaxa(ps)))
tax_table(ps) <- cbind(tax_table(ps),
                           rownames(tax_table(ps)))
head(taxa_names(ps))


#### ----  echo=FALSE---------------------------------------------------------------
head(taxa_names(ps))


#### ---- calculate_stats_ps, echo=FALSE, eval=FALSE--------------------------------
min_read_ps <- min(readcount(ps))
max_read_ps <- max(readcount(ps))
total_reads_ps <- sum(readcount(ps))
mean_reads_ps <- round(mean(readcount(ps)), digits = 0)
median_reads_ps <- median(readcount(ps))
total_asvs_ps <- ntaxa(ps)
singleton_ps <- tryCatch(ntaxa(rare(ps, detection = 1, prevalence = 0)),
                              error=function(err) NA)
singleton_ps_perc <- tryCatch(round((100*(ntaxa(rare(ps, detection = 1, prevalence = 0)) /
                                   ntaxa(ps))), digits = 3), error=function(err) NA)
sparsity_ps <- round(length(which(abundances(ps) == 0))/length(abundances(ps)),
                     digits = 3)


#### ---- add_ASV_coulmn, eval=FALSE------------------------------------------------
colnames(tax_table(ps)) <- c("Kingdom", "Phylum", "Class", "Order",
    "Family", "Genus", "ASV_SEQ", "ASV_ID")
ps


#### ----  echo=FALSE---------------------------------------------------------------
ps


#### ---- export_seq_tax_tables, eval=FALSE-----------------------------------------
write.table(tax_table(ps),
            "tables/16s-data-prep/full_tax_table.txt",
            sep="\t", quote = FALSE, col.names=NA)
write.table(t(otu_table(ps)),
            "tables/16s-data-prep/full_seq_table.txt",
            sep="\t", quote = FALSE, col.names=NA)


#### ---- export_fasta_full, eval=FALSE---------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
                                   table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)

write.table(table2format_trim_df, "tables/16s-data-prep/full_asv.fasta",
            sep = "\r", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")


#### ---- remove_specific_taxa, eval=FALSE------------------------------------------
# generate a file with mitochondria ASVs
MT1 <- subset_taxa(ps, Family == "Mitochondria")
MT1 <-  as(tax_table(MT1), "matrix")
MT1 <- MT1[, 8]
MT1df <- as.factor(MT1)
goodTaxa <- setdiff(taxa_names(ps), MT1df)
ps_no_mito <- prune_taxa(goodTaxa, ps)
ps_no_mito


#### ----  echo=FALSE---------------------------------------------------------------
ps_no_mito


#### ---- remove_specific_taxa2, eval=FALSE-----------------------------------------
# generate a file with mitochondria ASVs
CH1 <- subset_taxa(ps_no_mito, Order == "Chloroplast")
CH1 <-  as(tax_table(CH1), "matrix")
CH1 <- CH1[, 8]
CH1df <- as.factor(CH1)
goodTaxa <- setdiff(taxa_names(ps_no_mito), CH1df)
ps_no_chloro <- prune_taxa(goodTaxa, ps_no_mito)
ps_no_chloro


#### ----  echo=FALSE---------------------------------------------------------------
ps_no_chloro


#### ---- remove_specific_taxa3, eval=FALSE, eval=FALSE-----------------------------
# generate a file with mitochondria ASVs
EU1 <- subset_taxa(ps_no_chloro, Kingdom == "Eukaryota")
EU1 <-  as(tax_table(EU1), "matrix")
EU1 <- EU1[, 8]
EU1df <- as.factor(EU1)
goodTaxa <- setdiff(taxa_names(ps_no_chloro), EU1df)
ps_no_euk <- prune_taxa(goodTaxa, ps_no_chloro)
ps_no_euk


#### ---- remove_kingdom_na, eval=FALSE---------------------------------------------
ps_filt <- subset_taxa(ps_no_chloro, !is.na(Kingdom))
ps_filt


#### ----  echo=FALSE---------------------------------------------------------------
ps_filt


#### ---- save_trim_table, eval=FALSE-----------------------------------------------
write.table(tax_table(ps_filt),
            "tables/16s-data-prep/trim_tax_table.txt", sep="\t",
            quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_filt)),
            "tables/16s-data-prep/trim_seq_table.txt", sep="\t",
            quote = FALSE, col.names=NA)
write.table(sample_data(ps_filt),
            "tables/16s-data-prep/sample_table.txt",
            sep="\t", quote = FALSE, col.names=NA)


#### ---- export_fasta_water, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps_filt)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
                                   table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)

write.table(table2format_trim_df, "tables/16s-data-prep/trim_asv.fasta",
            sep = "\r", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")


#### ---- select_water_samples, echo=FALSE, eval=FALSE------------------------------
###############################################
## This is legacy code for making a ps object##
## for a subset of samples                   ##
###############################################

## Water Samples Phyloseq Object

#The next thing to do is select only water samples and make a separate phyloseq object---since our paper is about the water samples. To do this we must select the samples we *wish to keep*. If you want to change the group of samples, modify the script accordingly.

ps_water <- prune_samples(
  c("WCC0", "WCC1", "WCC2", "WCC3", "WCR0", "WCR1", "WCR2", "WCR3"),
  ps_filt)
ps_water

#Anytime we remove samples we probably lose some ASVs. So we need to get rid of any ASVs that have a total of **0 reads**.
ps_water <- prune_taxa(taxa_sums(ps_water) > 0, ps_water)
ps_water

#So for water samples only there are `r ntaxa(ps_water)` ASVs and `r nsamples(ps_water)` samples. Now, save the various phyloseq data from the water object.


#### ---- save_data_prep_image, echo=TRUE, eval=FALSE-------------------------------
ps_water <- ps_filt
save.image("rdata/16s-data-prep/16s-data_prep.rdata")
saveRDS(otu_table(ps_water), "rdata/16s-data-prep/ps_water-seqtab.rds")
saveRDS(sample_data(ps_water), "rdata/16s-data-prep/ps_water-sample.rds")
saveRDS(tax_table(ps_water), "rdata/16s-data-prep/ps_water-taxtab.rds")

#############################
##
## No 3. Diversity
##
#############################


#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
library(data.table)
library(plyr)
library(stringr)
require(gdata)
library(labdsv)
library(reshape)

options(scipen=999)
knitr::opts_current$get(c(
  "cache",
  "cache.path",
  "cache.rebuild",
  "dependson",
  "autodep"
))


#### ---- load_water_2, echo=FALSE--------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunk to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-water/16s-water.rdata")


#### ---- load_water_1, eval=FALSE--------------------------------------------------
remove(list = ls())
sample_d <- readRDS("rdata/16s-data-prep/ps_water-sample.rds")
seqtab <- readRDS("rdata/16s-data-prep/ps_water-seqtab.rds")
taxtab <- readRDS("rdata/16s-data-prep/ps_water-taxtab.rds")
ps_water <- merge_phyloseq(sample_d, seqtab, taxtab)
ps_water_o <- ps_water
ps_water


#### ----  echo=FALSE---------------------------------------------------------------
ps_water


#### ---- save_water_table, eval=FALSE----------------------------------------------
write.table(tax_table(ps_water),
            "tables/16s-water/water_tax_table.txt", sep="\t",
            quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_water)),
            "tables/16s-water/water_seq_table.txt", sep="\t",
            quote = FALSE, col.names=NA)


#### ---- export_fasta_water, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
table2format <- tax_table(ps_water)
#retain only the column with the sequences
table2format_trim <- table2format[, 7]
table2format_trim_df <- data.frame(row.names(table2format_trim),
                                   table2format_trim)
colnames(table2format_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
table2format_trim_df$ASV_ID <- sub("ASV", ">ASV", table2format_trim_df$ASV_ID)

write.table(table2format_trim_df, "tables/16s-water/water_asv.fasta",
            sep = "\r", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")


#### ---- add_var_cats, eval=FALSE--------------------------------------------------
sample_data(ps_water)$oxstate <-
  c("normoxic", "normoxic", "normoxic", "normoxic",
    "normoxic", "hypoxic", "hypoxic", "hypoxic")
sample_data(ps_water)$period <-
  c("cc_after", "cc_during", "cc_during", "cc_during",
    "cr_after", "cr_during", "cr_during", "cr_during")


#### ---- calculate_stats_ps_water, echo=FALSE, eval=FALSE--------------------------
### Reads
min_read_ps_water <- min(readcount(ps_water))
max_read_ps_water <- max(readcount(ps_water))
total_reads_ps_water <- sum(readcount(ps_water))
mean_reads_ps_water <- round(mean(readcount(ps_water)), digits = 0)
median_reads_ps_water <- median(readcount(ps_water))

### ASVs
total_asvs_ps_water <- ntaxa(ps_water)
min_asvs_ps_water <- min(estimate_richness(
  ps_water, measures = "Observed"))
max_asvs_ps_water <- max(estimate_richness(
  ps_water, measures = "Observed"))
mean_asvs_ps_water <- formatC(mean(
  estimate_richness(ps_water,
                    measures = "Observed")$Observed),
  digits = 0, format = 'f')
median_asvs_ps_water <- median(
  estimate_richness(ps_water,
                    measures = "Observed")$Observed)
singleton_ps_water <- tryCatch(ntaxa(rare(ps_water,
                                         detection = 1,
                                         prevalence = 0)),
                              error=function(err) NA)
singleton_ps_water_perc <- tryCatch(round((100*(ntaxa(rare(ps_water,
                                                          detection = 1,
                                                          prevalence = 0)) /
                                   ntaxa(ps_water))), digits = 3),
                                   error=function(err) NA)
sparsity_ps_water <- round(length(which(abundances(ps_water) == 0))/length(abundances(ps_water)),
                     digits = 3)


#### ---- sample_summary_table, eval=FALSE------------------------------------------
total_reads <- sample_sums(ps_water)
total_reads <- as.data.frame(total_reads, make.names = TRUE)
total_reads <- total_reads %>% rownames_to_column("Sample_ID")

total_asvs <- estimate_richness(ps_water,
                                measures = c(
                                  "Observed", "Shannon", "InvSimpson"))
total_asvs <- total_asvs %>% rownames_to_column("Sample_ID")
total_asvs$Sample_ID <- gsub('\\.', '-', total_asvs$Sample_ID)


#### ---- sample_summary_table2, eval=FALSE-----------------------------------------
sam_details <- meta(sample_data(ps_water))
rownames(sam_details) <- NULL

colnames(sam_details) <- c("Sample_ID", "Type", "Site", "Oxstate", "Period")

merge_tab <- merge(sam_details, total_reads, by = "Sample_ID")
merge_tab2 <- merge(merge_tab, total_asvs, by = "Sample_ID")
colnames(merge_tab2) <- c("Sample<br/>ID", "Type", "Site", "Oxstate", "Period",
    "total<br/>reads", "total<br/>ASVs", "Shannon", "InvSimpson")


#### ---- sample_summary_table_display, layout="l-body-outset", echo=FALSE----------
datatable(merge_tab2, width = "100%", escape = FALSE,
          rownames = FALSE,
          caption = htmltools::tags$caption(
            style = 'caption-side: bottom; text-align: left;',
            'Table: ', htmltools::em('Total reads & ASVs by sample.')),
          elementId = "mex738zeiysgiy3fysb8",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 8,
            lengthMenu = c(5, 10)
            )
          ) %>%
    formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
  formatStyle(columns = colnames(merge_tab2), fontSize = '80%')


#### ---- tax_clean, echo=FALSE, eval=FALSE-----------------------------------------
tax.clean <- data.frame(tax_table(ps_water))
for (i in 1:6){ tax.clean[,i] <- as.character(tax.clean[,i])}
tax.clean[is.na(tax.clean)] <- ""

for (i in 1:nrow(tax.clean)){
    if (tax.clean[i,2] == ""){
        kingdom <- paste("Kingdom_", tax.clean[i,1], sep = "")
        tax.clean[i, 2:6] <- kingdom
    } else if (tax.clean[i,3] == ""){
        phylum <- paste("Phylum_", tax.clean[i,2], sep = "")
        tax.clean[i, 3:6] <- phylum
    } else if (tax.clean[i,4] == ""){
        class <- paste("Class_", tax.clean[i,3], sep = "")
        tax.clean[i, 4:6] <- class
    } else if (tax.clean[i,5] == ""){
        order <- paste("Order_", tax.clean[i,4], sep = "")
        tax.clean[i, 5:6] <- order
    } else if (tax.clean[i,6] == ""){
        tax.clean$Genus[i] <- paste("Family",tax.clean$Family[i], sep = "_")
        }
}
tax_table(ps_water) <- as.matrix(tax.clean)
rank_names(ps_water)
rm(class, order, phylum, kingdom)


#### ----  echo=FALSE---------------------------------------------------------------
rank_names(ps_water)


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
write.table(tax_table(ps_water),
            "tables/16s-water/water_tax_table-no-na-names.txt", sep="\t",
            quote = FALSE, col.names=NA)
write.table(t(otu_table(ps_water)),
            "tables/16s-water/water_seq_table-no-na-names.txt", sep="\t",
            quote = FALSE, col.names=NA)


#### ---- assign_rank, eval=FALSE---------------------------------------------------
TRANK <- "Class"


#### ---- diversity_table_w1, eval=FALSE--------------------------------------------
tax_asv <- table(tax_table(ps_water)[, TRANK],
                 exclude = NULL, dnn = "Taxa")
tax_asv <- as.data.frame(tax_asv, make.names = TRUE, stringsAsFactors=FALSE)

tax_reads <- factor(tax_table(ps_water)[, TRANK])
tax_reads <- apply(otu_table(ps_water), MARGIN = 1, function(x)
{
    tapply(x, INDEX = tax_reads, FUN = sum, na.rm = TRUE, simplify = TRUE)
})
tax_reads <- as.data.frame(tax_reads, make.names = TRUE)
tax_reads <- cbind(tax_reads, reads = rowSums(tax_reads))
tax_reads <- tax_reads[9]
tax_reads <- setDT(tax_reads, keep.rownames = TRUE)[]


#### ---- diversity_table_w2, eval=FALSE--------------------------------------------
taxa_read_asv_tab <- merge(tax_reads, tax_asv, by.x = "rn", by.y = "Taxa")
top_reads <- top_n(taxa_read_asv_tab, n = 8, wt = reads)
top_asvs <- top_n(taxa_read_asv_tab, n = 8, wt = Freq)

names(taxa_read_asv_tab) <- c("Taxa", "total reads", "total ASVs")


#### ---- diversity_table_w3, echo=FALSE--------------------------------------------
datatable(
  taxa_read_asv_tab, rownames = FALSE, width = "100%",
  colnames = c("Taxa", "total reads", "total ASVs"),
  caption = htmltools::tags$caption(
    style = "caption-side: bottom; text-align: left;",
    "Total reads & ASVs by taxa" ),
  elementId = "9sgooxcmkzw565dtk7or",
  extensions = "Buttons",
  options = list(
    columnDefs = list(list(className = "dt-left", targets = 0)),
    dom = "Blfrtip",
    buttons = c("csv", "copy"),
    scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
    pageLength = 5,
    scroller=TRUE, lengthMenu = c(5, 10, 25, 60)))

top_reads2 <- tibble::column_to_rownames(top_reads, var = "rn")
top_reads2 <- top_reads2[with(top_reads2, order(-reads)),]

top_asvs2 <- tibble::column_to_rownames(top_asvs, var = "rn")
top_asvs2 <- top_asvs2[with(top_asvs2, order(-Freq)),]


#### ---- assign_rank2, eval=FALSE--------------------------------------------------
TRANK <- "Class"


#### ---- calc_avg2, warning=FALSE, eval=FALSE--------------------------------------
ps_water_AVG <- transform_sample_counts(ps_water, function(x) x/sum(x))
ps_water_ox <- merge_samples(ps_water, "oxstate")
SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate")


#### ---- calc_avg3, warning=FALSE, eval=FALSE--------------------------------------
mdata_phy_w <- tax_glom(ps_water_ox, taxrank = TRANK, NArm = FALSE)
mdata_phyrel_w <- transform_sample_counts(mdata_phy_w, function(x) x/sum(x))
meltd_w <- psmelt(mdata_phyrel_w)
meltd_w[[TRANK]] <- as.character(meltd_w[[TRANK]])


#### ---- calc_avg4, warning=FALSE, eval=FALSE--------------------------------------
means_w <- ddply(meltd_w, ~get(TRANK), function(x) c(mean = mean(x$Abundance)))
colnames(means_w) <- c(TRANK, "mean")
means_w$mean <- round(means_w$mean, digits = 5)
taxa_means_w <- means_w[order(-means_w$mean), ]  # this orders in decending fashion
taxa_means_w <- format(taxa_means_w, scientific = FALSE)  # ditch the sci notation


#### ---- calc_avg5, warning=FALSE, echo=FALSE--------------------------------------
datatable(
  taxa_means_w, rownames = FALSE, width = "100%",
  colnames = c(TRANK, "mean"),
  caption = htmltools::tags$caption(
    style = "caption-side: bottom; text-align: left;",
    "Relative abundance by taxonomic rank. "),
  elementId = "t9xg4oo3fbjqjqu1ogjm",
  extensions = "Buttons",
  options = list(columnDefs = list(list(
    className = "dt-left", targets = "_all")),
    dom = "Blfrtip", buttons = c("csv", "copy"),
    scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
    pageLength = 10,
    scroller=TRUE, lengthMenu = c(10, 25, 60)))


#### ---- calc_avg6, warning=FALSE, eval=FALSE--------------------------------------
TAXAN <- 8
top_perc_w <- top_n(taxa_means_w, n = TAXAN, wt = mean)
top_perc_w$mean <- round(as.numeric(top_perc_w$mean), digits = 5)
min_top_perc_w <- round(as.numeric(min(top_perc_w$mean)), digits = 5)
top_perc_w_list <- top_perc_w[,1]


#### ---- make_other_n_v_h, eval=FALSE----------------------------------------------
Other_w <- means_w[means_w$mean < min_top_perc_w, ][[TRANK]]
meltd_w[meltd_w[[TRANK]] %in% Other_w, ][[TRANK]] <- "Other"
samp_names_w <- aggregate(meltd_w$Abundance, by = list(meltd_w$Sample), FUN = sum)[, 1]
.e_w <- environment()
meltd_w[, TRANK] <- factor(meltd_w[, TRANK], sort(unique(meltd_w[, TRANK])))
meltd_w1 <- meltd_w[order(meltd_w[, TRANK]), ]

target <- c("Cyanobacteriia", "Alphaproteobacteria",
            "Bacteroidia", "Gammaproteobacteria",
            "Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)",
            "Acidimicrobiia", "Campylobacteria", "Other")
meltd_w1[[TRANK]] <- reorder.factor(meltd_w1[[TRANK]], new.order = target)


#### ---- make_fig_n_v_h, echo=FALSE, eval=FALSE------------------------------------
family_pal <- c("#009E73", #1
                "#56B4E9", #2
                "#CC79A7", #3
                "#0072B2", #4
                "#D55E00", #5
                "#B6DBFF", #6
                "#E69F00", #7
                "#F0E442", #8
                "#323232", #9
                "#7F7F7F")

fig_w <- ggplot(meltd_w1,
         aes_string(x = "Sample", y = "Abundance",
                    fill = TRANK, show.legend = FALSE),
         ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label")

fig_w <- fig_w + geom_bar(stat = "identity",
                 position = position_stack(reverse = TRUE),
                 width = 0.95) + coord_flip() +
                 theme(aspect.ratio = 1/4) + theme(legend.position = "bottom")
fig_w <- fig_w + scale_fill_manual(values = family_pal)
fig_w <- fig_w + labs(x = "Environment",
                      y = "Relative abundance (% total reads)")

fig_w <- fig_w + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) +
  theme(legend.key = element_rect(colour = "black"))

fig_w <- fig_w + guides(
  fill = guide_legend(override.aes = list(colour = NULL),
                      reverse = FALSE,
                      title.position = "top", nrow = 3))

fig_w <- fig_w + theme(axis.line = element_line(colour = "black"),
                 panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(),
                 panel.border = element_rect(colour = "black",
                                             fill = NA, size = 1))
fig_w
#ggsave("oxstate_by_taxa.png")
#invisible(dev.off())


#### ----  echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-water/bar-norm-v-hypox.png")


#### ---- hyp_v_norm_agg, eval=FALSE------------------------------------------------
hyp_v_norm <- aggregate(meltd_w1$Abundance,
                 by = list(Rank = meltd_w1[[TRANK]],
                           Sample = meltd_w1$Sample),
                 FUN = sum)
hyp_v_norm$x <- round(hyp_v_norm$x, digits = 4)
hyp_v_norm <- spread(hyp_v_norm, Sample, x)


#### ---- sample_summary_table_display2, echo=FALSE---------------------------------
datatable(hyp_v_norm, width = "100%", escape = FALSE,
          rownames = FALSE,
          caption = htmltools::tags$caption(
            style = 'caption-side: bottom; text-align: left;',
            'Table: ', htmltools::em('Relative abundance (reads) of normoxic
                                     vs hypoxic samples by dominant taxa.')),
          elementId = "wewuidn73q2ceico4vyw",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 9
            )
          ) #%>%
#    formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
#  formatStyle(columns = colnames(merge_tab2), fontSize = '80%')


#### ---- assign_rankB, echo=FALSE, eval=FALSE--------------------------------------
TRANK <- "Class"


#### ---- calc_avg2B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
ps_water_tm <- merge_samples(ps_water, "period")
SD_BAR_tm <- merge_samples(sample_data(ps_water_AVG), "period")


#### ---- calc_avg3B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
mdata_phy_tm <- tax_glom(ps_water_tm, taxrank = TRANK, NArm = FALSE)
mdata_phyrel_tm <- transform_sample_counts(mdata_phy_tm, function(x) x/sum(x))
meltd_tm <- psmelt(mdata_phyrel_tm)
meltd_tm[[TRANK]] <- as.character(meltd_tm[[TRANK]])


#### ---- calc_avg4B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
means_tm <- ddply(meltd_tm, ~get(TRANK), function(x) c(mean = mean(x$Abundance)))
colnames(means_tm) <- c(TRANK, "mean")
means_tm$mean <- round(means_tm$mean, digits = 5)
taxa_means_tm <- means_tm[order(-means_tm$mean), ]  # this orders in decending fashion
taxa_means_tm <- format(taxa_means_tm, scientific = FALSE)  # ditch the sci notation


#### ---- calc_avg5B, warning=FALSE, echo=FALSE-------------------------------------
datatable(
  taxa_means_tm, rownames = FALSE, width = "100%",
  colnames = c(TRANK, "mean"),
  caption = htmltools::tags$caption(
    style = "caption-side: bottom; text-align: left;",
    "Relative abundance by taxonomic rank. "),
  elementId = "zswbg7x7f8eqj526xuox",
  extensions = "Buttons",
  options = list(columnDefs = list(list(
    className = "dt-left", targets = "_all")),
    dom = "Blfrtip", buttons = c("csv", "copy"),
    scrollX = TRUE, scrollCollapse = TRUE, scrollY=TRUE,
    pageLength = 9,
    scroller=TRUE, lengthMenu = c(9, 25, 60)))


#### ---- calc_avg6B, warning=FALSE, echo=FALSE, eval=FALSE-------------------------
TAXAN <- 8
top_perc_tm <- top_n(taxa_means_tm, n = TAXAN, wt = mean)
top_perc_tm$mean <- round(as.numeric(top_perc_tm$mean), digits = 5)
min_top_perc_tm <- round(as.numeric(min(top_perc_tm$mean)), digits = 5)
top_perc_tm_list <- top_perc_tm[,1]


#### ---- make_other_d_v_a, echo=FALSE, eval=FALSE----------------------------------
Other_tm <- means_tm[means_tm$mean < min_top_perc_tm, ][[TRANK]]
meltd_tm[meltd_tm[[TRANK]] %in% Other_tm, ][[TRANK]] <- "Other"
samp_names_tm <- aggregate(meltd_tm$Abundance, by = list(meltd_tm$Sample), FUN = sum)[, 1]
.e_tm <- environment()
meltd_tm[, TRANK] <- factor(meltd_tm[, TRANK], sort(unique(meltd_tm[, TRANK])))
meltd_tm1 <- meltd_tm[order(meltd_tm[, TRANK]), ]

target <- c("Cyanobacteriia", "Alphaproteobacteria",
            "Bacteroidia", "Gammaproteobacteria",
            "Thermoplasmata", "Phylum_Marinimicrobia_(SAR406_clade)",
            "Acidimicrobiia", "Campylobacteria", "Other")
meltd_tm1[[TRANK]] <- reorder.factor(meltd_tm1[[TRANK]], new.order=target)


#### ---- make_fig_d_v_a, echo=FALSE, layout="l-body-outset", eval=FALSE------------
family_pal <- c("#009E73", #1
                "#56B4E9", #2
                "#CC79A7", #3
                "#0072B2", #4
                "#D55E00", #5
                "#B6DBFF", #6
                "#E69F00", #7
                "#F0E442", #8
                "#323232", #9
                "#7F7F7F")

fig_tm <- ggplot(meltd_tm1,
         aes_string(x = "Sample", y = "Abundance",
                    fill = TRANK, show.legend = FALSE),
         ordered = TRUE, xlab = "x-axis label", ylab = "y-axis label")

fig_tm <- fig_tm + geom_bar(stat = "identity",
                 position = position_stack(reverse = TRUE),
                 width = 0.95) + coord_flip() +
                 theme(aspect.ratio = 1/4) + theme(legend.position = "bottom")
fig_tm <- fig_tm + scale_fill_manual(values = family_pal)

fig_tm <- fig_tm + labs(x = "Environment",
                      y = "Relative abundance (% total reads)",
                      title = "Abundance of taxa before & after event")

fig_tm <- fig_tm + theme(plot.margin = margin(0, 0.1, 0, 0.1, unit = "cm")) +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.45, vjust = 1)) +
  theme(legend.key = element_rect(colour = "black"))

fig_tm <- fig_tm + guides(
  fill = guide_legend(override.aes = list(colour = NULL),
                      reverse = FALSE,
                      title.position = "top", nrow = 3))

fig_tm <- fig_tm + theme(axis.line = element_line(colour = "black"),
                 panel.grid.major = element_blank(),
                 panel.grid.minor = element_blank(),
                 panel.border = element_rect(colour = "black",
                                             fill = NA, size = 1))
fig_tm
#ggsave("period_by_taxa.png")
#invisible(dev.off())


#### ----  echo=FALSE, warning=FALSE, fig.height=2, layout="l-body-outset"----------
knitr::include_graphics("figures/16s-water/bar-before-v-after.png")


#### ---- before_v_after1, echo=FALSE, eval=FALSE-----------------------------------
before_v_after <- aggregate(meltd_tm1$Abundance,
                 by = list(Rank = meltd_tm1[[TRANK]],
                           Sample = meltd_tm1$Sample),
                 FUN = sum)
before_v_after$x <- round(before_v_after$x, digits = 4)
before_v_after <- spread(before_v_after, Sample, x)


#### ---- before_v_after2, echo=FALSE-----------------------------------------------
datatable(before_v_after, width = "100%", escape = FALSE,
          rownames = FALSE, colnames = c("Crawl Caye (after)",
                                         "Crawl Caye (during)",
                                         "Cayo Roldan (after)",
                                         "Cayo Roldan (during)"),
          caption = htmltools::tags$caption(
            style = 'caption-side: bottom; text-align: left;',
            'Table: ', htmltools::em('Relative abundance (reads) of normoxic
                                     vs hypoxic samples by dominant taxa.')),
          elementId = "kqtr8k9xeqyk0sb1s4h4",
          extensions = 'Buttons', options = list(
            scrollX = TRUE,
            dom = 'Brti',
            buttons = c('copy', 'csv', 'excel'),
            pageLength = 9
            )
          ) #%>%
#    formatRound(columns=c("Shannon", "InvSimpson"), digits=2) %>%
#  formatStyle(columns = colnames(merge_tab2), fontSize = '80%')


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
saveRDS(ps_water, "rdata/16s-water/ps_water.rds")
saveRDS(ps_water_o, "rdata/16s-water/ps_water_o.rds")
saveRDS(ps_water_ox, "rdata/16s-water/ps_water_ox.rds")
save.image("rdata/16s-water/16s-water.rdata")

#############################
##
## No 4. Differentially Abundant ASVs
##
#############################


#### ---- setup, include=FALSE------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(0199)
library(phyloseq); packageVersion("phyloseq")
library(DT)
library(ggplot2)
library(Biostrings); packageVersion("Biostrings")
library(dplyr)
library(microbiome)
library(tidyverse)
library(data.table)
library(plyr)
library(stringr)
require(gdata)
library(labdsv)
library(reshape)
library(metacoder)
library(naniar)

options(scipen=999)
knitr::opts_current$get(c(
  "cache",
  "cache.path",
  "cache.rebuild",
  "dependson",
  "autodep"
))


#### ----  echo=FALSE---------------------------------------------------------------
### NOTE: this should only be run AFTER the workflow is finished.
### This image is created at the END of this workflow and now read back in.
### Basically, this locks everything in place so nothing changes. Only code
### chunkc to display figures and tables are evaluated. Everything else is
### turned off. To actually run this you need to set this chunk to
### eval=FALSE and the rest to eval=TRUE
load("rdata/16s-da-asvs/da_asvs_pipeline.rdata")


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
remove(list = ls())
ps_water <- readRDS("rdata/16s-water/ps_water.rds")
ps_water_o <- readRDS("rdata/16s-water/ps_water_o.rds")
ps_water_ox <- readRDS("rdata/16s-water/ps_water_ox.rds")
load("rdata/16s-water/16s-water.rdata")


#### ---- idval_analysis, results='hide', eval=FALSE--------------------------------
set.seed(119)
#rm(class, order, phylum, kingdom)
#ls(all.names = TRUE)
water_seq_table <- data.frame(otu_table(ps_water))
# Delete columns when sum == 0 Should not be any
water_seq_table <- water_seq_table[, which(colSums(water_seq_table) != 0)]

water_seq_table <- cbind(oxstate = c("normoxic","normoxic",
                                     "normoxic","normoxic",
                                     "normoxic","hypoxic",
                                     "hypoxic","hypoxic"),
                            water_seq_table)

iva <- indval(water_seq_table[,-1], water_seq_table[,1])
gr <- iva$maxcls[iva$pval <= 0.05]
iv <- iva$indcls[iva$pval <= 0.05]
pv <- iva$pval[iva$pval <= 0.05]
fr <- apply(water_seq_table[,-1] > 0, 2, sum)[iva$pval <= 0.05]
indvalsummary <- data.frame(group = gr, indval = iv,
                            pvalue = pv, freq = fr)
indvalsummary <- indvalsummary[order(indvalsummary$group,
                                     -indvalsummary$indval),]
water_tax_table <- data.frame(tax_table(ps_water))
indvalsummary_tax <- merge(indvalsummary, water_tax_table,
                           by = "row.names",
                           all = TRUE)
indvalsummary_tax <- indvalsummary_tax[!(is.na(indvalsummary_tax$group)),]

lapply(indvalsummary_tax, class)
class(indvalsummary_tax$group) = "character"
indvalsummary_tax$group <- ifelse(indvalsummary_tax$group == "1",
                                  as.character("hypoxic"),
                                  as.character("normoxic"))


#### ---- idval_tables, echo=FALSE, eval=FALSE--------------------------------------
water_seq_table_t <- water_seq_table
water_seq_table_t[,1] <- NULL
water_seq_table_t <- as.data.frame(t(water_seq_table_t))
water_seq_table_t$reads <- rowSums(water_seq_table_t)
water_seq_table_t$norm <- rowSums(water_seq_table_t[1:5])
water_seq_table_t$hypo <- rowSums(water_seq_table_t[6:8])
water_seq_table_t[1:8] <- NULL
water_seq_table_t <- tibble::rownames_to_column(water_seq_table_t, "ASV")

names(indvalsummary_tax)[1] <- "ASV"
full_indval_sum <- merge(water_seq_table_t, indvalsummary_tax,
                           by = "ASV",
                           all = FALSE)

indvalsummary <- tibble::rownames_to_column(indvalsummary, "ASV")
write.table(full_indval_sum, "tables/16s-da-asvs/indval_tax.txt",
            row.names = FALSE, sep = "\t", quote = FALSE)
write.table(indvalsummary, "tables/16s-da-asvs/indval_summary.txt",
            row.names = FALSE, sep = "\t", quote = FALSE)


#### ---- scg_tax, echo=FALSE, layout="l-page"--------------------------------------
full_indval_sum <- read.table("tables/16s-da-asvs/indval_tax.txt", header = TRUE)
full_indval_sum_noseq <- full_indval_sum
full_indval_sum_noseq[,c(15:16)] <- NULL
datatable(colnames = c("ASV", "reads", "norm", "hypo",
                       "group", "indval", "pval", "freq",
                       "Kingdom", "Phylum", "Class", "Order",
                       "Family", "Genus"),
  full_indval_sum_noseq, rownames = FALSE, autoHideNavigation = TRUE,
  width = "100%", elementId = "twykw9v4dzmp1lymga92",
  caption = htmltools::tags$caption(
    style = "caption-side: bottom; text-align: left;",
    "Differentially Abundant ASVs assessed by
    Indicator Species Analysis (ISA)."),
  extensions = c("Buttons", "FixedColumns"),
  options = list(autoWidth = TRUE,
    columnDefs = list(list(className = "dt-left", targets = 0)),
    dom = "Blfrtip",
    buttons = list(list(
        extend = 'collection',
        buttons = c('csv', 'excel', 'copy'),
        text = 'Download')),
    pageLength = 10, lengthMenu = c(5, 10, 50, 160),
    scrollX = TRUE, scrollCollapse = FALSE, scrollY=FALSE, paging=TRUE,
    fixedColumns = list(leftColumns = 1, rightColumns = 0))) %>%
    formatRound(columns=c("indval", "pvalue"), digits = 3) %>%
  formatStyle(columns = colnames(full_indval_sum_noseq), fontSize = '80%')


#### ---- export_da_fasta, echo=FALSE, eval=FALSE-----------------------------------
# Create fasta file from tax_table
table2format2 <- full_indval_sum
table2format2_t <- table2format2[table2format2$reads >= 100 ,]
#retain only the column with the sequences
table2format_trim2 <- table2format2[, 16:15]
table2format_trim2_t <- table2format2_t[, 16:15]
#format fasta
table2format_trim2$ASV_ID <- sub("ASV", ">ASV", table2format_trim2$ASV_ID)
table2format_trim2_t$ASV_ID <- sub("ASV", ">ASV", table2format_trim2_t$ASV_ID)
write.table(table2format_trim2, "tables/16s-da-asvs/idval_asv_all.fasta",
            sep = "\n", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")
write.table(table2format_trim2_t, "tables/16s-da-asvs/idval_asv_100.fasta",
            sep = "\n", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
full_tally <- table(indvalsummary_tax$group)
trim_tally <- table(table2format2$group)


#### ----  eval=FALSE---------------------------------------------------------------
ps_water_100 <- prune_taxa(taxa_sums(ps_water) > 100, ps_water)
ps_water_100


#### ----  echo=FALSE---------------------------------------------------------------
ps_water_100


#### ----  eval=FALSE---------------------------------------------------------------
data_tab <- as.data.frame(t(otu_table(ps_water_100)))
data_tab <- data_tab %>% rownames_to_column("Group")
write.table(data_tab, "16s-anvio/data.txt", sep = "\t",
            quote = FALSE, row.names = FALSE)


#### ----  eval=FALSE---------------------------------------------------------------
data_tab2 <- as.data.frame(t(otu_table(ps_water_100)))
total_reads2 <- cbind(data_tab2, total_reads = rowSums(data_tab2))
total_reads2[1:8] <- NULL
tax_tab2 <- as.data.frame(tax_table(ps_water_100))
tax_tab2[7] <- NULL
total_reads2 <- tibble::rownames_to_column(total_reads2, "Group")
tax_tab2 <- tibble::rownames_to_column(tax_tab2, "Group")

merge_tabX <- merge(total_reads2, tax_tab2,
                           by = "Group",
                           all = TRUE)
drop_c <- c("reads", "norm", "hypo", "Kingdom",
            "Phylum", "Class", "Order", "Family", "Genus")
idval2 <- full_indval_sum_noseq[ , !(names(full_indval_sum_noseq) %in% drop_c)]
colnames(idval2)[1:2] <- c("Group", "enriched")

additional_layers <- merge(idval2, merge_tabX,
                           by = "Group",
                           all.y = TRUE)
write.table(additional_layers, "16s-anvio/additional_layers.txt",
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")


#### ----  eval=FALSE---------------------------------------------------------------
additional_view <- merge_tab2
additional_view[2] <- NULL
colnames(additional_view) <- c("id", "site", "oxstate", "period", "no_reads",
    "no_asvs", "Shannon", "InvSimpson")
write.table(additional_view, "16s-anvio/additional_views.txt",
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")


#### ----  eval=FALSE---------------------------------------------------------------
# Make the table
anvio_taxa <- ps_water
glom_rank <- tax_glom(anvio_taxa, taxrank = 'Class')
melt_rank <- psmelt(glom_rank)
melt_rank$Class <- as.character(melt_rank$Class)
rank_abundance <- aggregate(Abundance~Sample+Class, melt_rank, FUN = sum)
rank_abundance <- as.data.frame(cast(rank_abundance, Sample ~ Class))
#rank_abundance <- as.data.frame(rank_abundance)
rank_abundance <- tibble::remove_rownames(rank_abundance)
rank_abundance <- tibble::column_to_rownames(rank_abundance, "Sample")

# Reorder table column by summ
layers <- rank_abundance[,names(sort(colSums(rank_abundance), decreasing = TRUE))]

# Add the prefix
layers <- layers %>% dplyr::rename_all(function(x) paste0("t_class!", x))
layers <- tibble::rownames_to_column (layers, "taxon")

# save the table
write.table(layers, "16s-anvio/tax_layers.txt",
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")


#### ----  eval=FALSE---------------------------------------------------------------
sam_tree <- read_file("16s-anvio/sample.tre")
sam_tree <- gsub("[\r\n]", "", sam_tree)
item_name <- c("euc_ward")
data_type <- c("newick")
data_value <- c(sam_tree)
sam_tre_tab <- data.frame(item_name, data_type, data_value)
library(janitor)
sam_tre_tab %>% remove_empty("rows")
write.table(sam_tre_tab, "16s-anvio/sample_tree_tab.txt",
            sep = "\t", quote = FALSE, row.names = FALSE, na = "")


#### ---- export_fasta_anvio, eval=FALSE--------------------------------------------
# Create fasta file from tax_table
anvio_tab <- tax_table(ps_water_100)
#retain only the column with the sequences
anvio_tab_trim <- anvio_tab[, 7]
anvio_tab_trim_df <- data.frame(row.names(anvio_tab_trim),
                                   anvio_tab_trim)
colnames(anvio_tab_trim_df) <- c("ASV_ID", "ASV_SEQ")
#format fasta
anvio_tab_trim_df$ASV_ID <- sub("ASV", ">ASV", anvio_tab_trim_df$ASV_ID)

write.table(anvio_tab_trim_df, "16s-anvio/anvio.fasta",
            sep = "\r", col.names = FALSE, row.names = FALSE,
            quote = FALSE, fileEncoding = "UTF-8")


#### ----  echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-da-asvs/before.png")


#### ----  echo=FALSE, layout="l-page", warning=FALSE, fig.height=3-----------------
knitr::include_graphics("figures/16s-da-asvs/after.png")


#### ----  echo=FALSE, warning=FALSE, fig.height=2----------------------------------
knitr::include_graphics("figures/16s-da-asvs/cyano_example.png")


#### ----  eval=FALSE---------------------------------------------------------------
ps_water_mc <- ps_water


#### ---- add_var_cats, echo=FALSE, eval=FALSE--------------------------------------
## USE This code if you want to start with NA taxa before rename
## First, we need to start with the original phyloseq object (`ps_water_o`) before we modified it in any way. We will copy it then add in metadata (oxstate and period) as before. This way we can have a phyloseq object specifically for metacoder.
ps_water_mc <- ps_water_o
sample_data(ps_water_mc)$oxstate <-
  c("normoxic", "normoxic", "normoxic", "normoxic",
    "normoxic", "hypoxic", "hypoxic", "hypoxic")
sample_data(ps_water_mc)$period <-
  c("cc_after", "cc_during", "cc_during", "cc_during",
    "cr_after", "cr_during", "cr_during", "cr_during")


#### ----  eval=FALSE---------------------------------------------------------------
ps_water_mc_100 <- prune_taxa(taxa_sums(ps_water_mc) > 100, ps_water_mc)
ps_water_mc_10 <- prune_taxa(taxa_sums(ps_water_mc) > 10, ps_water_mc)
total_reads_ps_water <- sum(readcount(ps_water_mc))
total_reads_ps_water_100 <- sum(readcount(ps_water_mc_100))
total_reads_ps_water_10 <- sum(readcount(ps_water_mc_10))
ps_obj <- ps_water_mc_100


#### ----  eval=FALSE---------------------------------------------------------------
#summarize_phyloseq(ps_water_o)
#summarize_phyloseq(ps_water_mc)
tax_tab1 <- as.data.frame(tax_table(ps_obj))
tax_tab1 <- tibble::rownames_to_column(tax_tab1, "otu_id")
asv_tab1 <- as.data.frame(t(otu_table(ps_obj)))
asv_tab1 <- tibble::rownames_to_column(asv_tab1, "otu_id")
sam_tab1 <- data.frame(sample_data(ps_obj))
sam_tab1[1] <- NULL
da_samp <- tibble::rownames_to_column(sam_tab1, "sample_id")
water_mc <- merge(asv_tab1, tax_tab1, by="otu_id")
water_mc$ASV_SEQ <- NULL


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
#This code replaces all Genus names with `NA`. Comment out to keep names.
# but it messes things up
#gen_names <- water_mc$Genus
#water_mc <- water_mc %>% replace_with_na_at(.vars = "Genus",
#                                            condition = ~.x %in% gen_names)


#### ----  eval=FALSE---------------------------------------------------------------
obj <- parse_tax_data(water_mc, class_cols = c("Kingdom", "Phylum",
                                             "Class", "Order",
                                             "Family", "Genus", "ASV_ID" ))
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data",
                                       cols = da_samp$sample_id)
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund",
                                   groups = da_samp$oxstate,
                                   cols = da_samp$sample_id)


#### ----  eval=FALSE---------------------------------------------------------------
obj$data$diff_table <- compare_groups(obj,
                                      data = "tax_abund",
                                      cols = da_samp$sample_id,
                                      groups = da_samp$oxstate)


#### ---- echo=FALSE, eval=FALSE----------------------------------------------------
### THIS is optional if you want to remove low abund taxa AFTER running the comparative stats
obj_t <- obj
obj_t$data$tax_data <- zero_low_counts(obj_t, data = "tax_data", min_count = 100, use_total = TRUE)
no_reads <- rowSums(obj_t$data$tax_data[, da_samp$sample_id]) == 0
sum(no_reads)
obj_t <- filter_obs(obj_t, data = "tax_data", ! no_reads, drop_taxa = TRUE)


#### ---- echo=FALSE, eval=FALSE----------------------------------------------------
### THIS is optional if you want to run fdr
obj$data$diff_table$wilcox_p_value <- p.adjust(obj$data$diff_table$wilcox_p_value,
                                               method = "fdr")
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
range(obj$data$diff_table$log2_median_ratio[is.finite(obj$data$diff_table$log2_median_ratio)])


#### ----  eval=FALSE---------------------------------------------------------------
#set.seed(1999)
obj %>%
  filter_taxa(taxon_names %in% c("Bacteria"), subtaxa = TRUE) %>%
#  filter_taxa(taxon_ranks == "o", supertaxa = TRUE) %>% # subset to the order rank
#  filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia", "Archaea"), subtaxa = TRUE, invert = TRUE) # to remove taxa
  heat_tree(
          node_label = taxon_names,
          node_size = n_obs,
         # node_size_range = c(0.01, 0.05),
          node_label_size_range = c(0.008, 0.04),
          node_color = log2_median_ratio,
          node_color_interval = c(-5.5, 4),
          edge_color_interval = c(-5.5, 4),
          node_color_trans = "area",
          node_color_range = c("#D55E00", "gray", "#0072B2"),
          node_size_axis_label = "OTU count",
          node_color_axis_label = "Log 2 median ratio",
          layout = "da",
          initial_layout = "re",
          overlap_avoidance = 2,
          output_file = "figures/16s-da-asvs/differential_heat_tree.png")


#### ----  echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree.png")


#### ---- eval=FALSE----------------------------------------------------------------
## This is for the example above
#set.seed(10)
obj %>%
  filter_taxa(taxon_names %in% c("Cyanobacteria"),
              subtaxa = TRUE) %>%
  heat_tree(node_label = taxon_names,
            node_size = n_obs,
            node_size_range = c(0.02, 0.03),
            node_label_size_range = c(0.02, 0.04),
            node_color = log2_median_ratio,
            node_color_interval = c(-5, 5),
            edge_color_interval = c(-5, 5),
            node_color_range = c("#D55E00", "gray", "#0072B2"),
            tree_label = taxon_names,
            initial_layout = "re", layout = "da",
            node_color_axis_label = "Sum of root reads",
            node_size_axis_label = "Number of OTUs",
            output_file = "figures/16s-da-asvs/cyano_example.svg")


#### ----  eval=FALSE---------------------------------------------------------------
#set.seed(10)
obj %>%
  filter_taxa(taxon_names %in% c("Proteobacteria", "Bacteroidia"),
              subtaxa = TRUE) %>%
  heat_tree(node_label = taxon_names,
            node_size = n_obs,
            node_color = log2_median_ratio,
            node_color_range = c("#D55E00", "gray", "#0072B2"),
            node_color_interval = c(-3, 3),
            tree_label = taxon_names,
            initial_layout = "re", layout = "da",
            node_color_axis_label = "Sum of root reads",
            node_size_axis_label = "Number of OTUs",
            output_file = "figures/16s-da-asvs/by_major_taxa.png")


#### ----  echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/by_major_taxa.png")


#### ----  eval=FALSE---------------------------------------------------------------
set.seed(1)
obj$data$diff_table2 <- compare_groups(obj,
                                      data = "tax_abund",
                                      cols = da_samp$sample_id, # What columns of sample data to use
                                      groups = da_samp$period) # What category each sample is assigned to
heat_tree_matrix(obj,
                 data = "diff_table2",
                 node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                 node_label = taxon_names,
                 node_color = log2_median_ratio, # A column from `obj$data$diff_table`
                 node_color_range = diverging_palette(), # The built-in palette for diverging data
                 node_color_trans = "linear", # The default is scaled by circle area
                 node_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display
                 edge_color_interval = c(-3, 3), # The range of `log2_median_ratio` to display
                 node_size_axis_label = "Number of OTUs",
                 node_color_axis_label = "Log2 ratio median proportions",
                 layout = "davidson-harel", # The primary layout algorithm
                 initial_layout = "reingold-tilford", # The layout algorithm that initializes node locations
                 output_file = "figures/16s-da-asvs/differential_heat_tree2.png") # Saves the plot as a png file


#### ----  echo=FALSE, warning=FALSE, fig.height=2, layout='l-body-outset'----------
knitr::include_graphics("figures/16s-da-asvs/differential_heat_tree2.png")


#### ----  echo=FALSE, eval=FALSE---------------------------------------------------
save.image("rdata/16s-da-asvs/da_asvs_pipeline.rdata")


#### ---- calc_avg2, warning=FALSE, echo=FALSE, eval=FALSE--------------------------
### Script for DESeq NOT FINISHED
library(DESeq2)
ds2 <- phyloseq_to_deseq2(ps_water, ~ oxstate)
dds <- DESeq(ds2)
res <- results(dds)
deseq.results <- as.data.frame(res)
df <- deseq.results
df$taxon <- rownames(df)
df <- df %>% arrange(log2FoldChange, padj)
write.table(res, "res.txt")

###########################
ps_water_avg <- transform_sample_counts(ps_water, function(x) x/sum(x))
t(otu_table(ps_water_avg))
pseq.compositional <- microbiome::transform(ps_water, "compositional")
t(otu_table(pseq.compositional))

mergedGP <- merge_samples(ps_water_avg, "oxstate")
SD  <-  merge_samples(sample_data(ps_water_avg), "oxstate")


OTUnames10 = names(sort(taxa_sums(ps_water), TRUE)[1:100])
GP10  = prune_taxa(OTUnames10,  ps_water)
t(otu_table(GP10))
mGP10 = prune_taxa(OTUnames10, mergedGP)
ocean_samples = sample_names(subset(sample_data(GP), SampleType=="Ocean"))

print(ocean_samples)


otu_table(mergedGP)[, hypoxic]

sample_names(mergedGP)

identical(SD, sample_data(mergedGP))
otu.relative <- abundances(ps_water, "compositional")


ps_water_ox <- merge_samples(ps_water_AVG, "oxstate")

SD_BAR_w <- merge_samples(sample_data(ps_water_AVG), "oxstate")
head(otu_table(ps_water_ox))

ox_otu <- as.data.frame(t(otu_table(ps_water_ox)))
ox_tax <- as.data.frame(tax_table(ps_water_ox))
ox_asv_tab <- merge(ox_otu, ox_tax,by = 0, all = TRUE)

ox_asv_tab <- ox_asv_tab[,-1]
ox_asv_tab <- ox_asv_tab[,c(9,1,2,3,4,5,6,7,8)]

transform_sample_counts(ps_water_ox, function(x) x/sum(x))