library(vegan)
library(phytools)
library(MASS)
library(Biostrings)
library(tidyverse)
library(ape)
library(seqinr)
library(phyloseq)
library(ggtree)
library(scales)
library(ggplot2)


###########################################################################################################################
# 
# 								BACTERIA
#
#
############################################################################################################################

----------------
# NMDS
---------------


#data input
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria")
abund<-read.table("table-BI_16S_all-runs_grouped-mcf-f10-noCMU-biom.tsv", h=T, sep="\t", row.names = 1)
soil<-read.table("metadata_16S_all_grouped.tsv", h=T, sep="\t")
tax<-read.table("taxonomy.tsv", h=T, sep="\t", row.names = 1) #input taxonomy file
tax2<- tax[rownames(tax) %in% rownames(abund), ]
rownames(abund) <- tax2$ASV[match(row.names(abund), row.names(tax2))]
abund<-as.data.frame(t(abund))
abund<-sweep(abund, 1, rowSums(abund), '/')
abund<-sweep(abund,1,100,'*')
soil<-soil[ order(soil$sample.id), ]
abund<-abund[ order(row.names(abund)), ]
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria/NMDS/norm_data")


#Normalize data (sqrt)
abund_norm <- decostand(abund, method = "hellinger")



#NMDS analysis

abund.mds<-metaMDS(abund_norm, distance="horn") 

#with soils and years 

soil.v <- factor(soil$soil)
year.v <- factor(soil$year)
col.gr <- c("orange", "gray")
pchs <- c(16:17)

pdf("abund.mds.soil-year.pdf", w=5, h=5) #with colors
fig<-ordiplot(abund.mds, type="none", display="sites")
points(fig,"sites", pch = pchs[year.v], col = col.gr[soil.v])
title(main = "Bacteria")
legend("bottomleft", legend = c("2016", "2017", "Lisse", "Vredepeel"), 
       pch=c(16,17,15,15), col=c("black","black","orange", "gray"), 
       bty = "n", ncol=1)
dev.off()

#Significance:
adonis(vegdist(abund, method="horn")~soil$soil) 
anosim(vegdist(abund, method="horn"),soil$soil) 

adonis(vegdist(abund, method="horn")~soil$year) 
anosim(vegdist(abund, method="horn"),soil$year) 


#With treatments

colors<-c("mistyrose4","darkgoldenrod1","light blue","black","mediumorchid1", "forestgreen",
          "chocolate1","plum2","blue", "lightcoral", "lightgreen","firebrick1")

pdf("abund.mds.treatment.pdf")
fig<-ordiplot(abund.mds, type="none", display="sites")
points(fig,"sites",soil$treatment=="CaNO3",pch=21,col="light blue",bg="light blue")
points(fig,"sites",soil$treatment=="KeraproSon",pch=21,col="lightcoral",bg="lightcoral")
points(fig,"sites",soil$treatment=="KeraproChamplus",pch=21,col="blue",bg="blue")
points(fig,"sites",soil$treatment=="Biophosphate",pch=21,col="darkgoldenrod1",bg="darkgoldenrod1")
points(fig,"sites",soil$treatment=="Keratinemengsel",pch=21,col="lightgreen",bg="lightgreen")
points(fig,"sites",soil$treatment=="GFTcompost",pch=21,col="forestgreen",bg="forestgreen")
points(fig,"sites",soil$treatment=="groencompost",pch=21,col="chocolate1",bg="chocolate1")
points(fig,"sites",soil$treatment=="fase3eind",pch=21,col="mediumorchid1",bg="mediumorchid1")
points(fig,"sites",soil$treatment=="zaadmeel",pch=21,col="firebrick1",bg="firebrick1")
points(fig,"sites",soil$treatment=="biomassavers",pch=21,col="mistyrose4",bg="mistyrose4")
points(fig,"sites",soil$treatment=="haar_hoefmeel",pch=21,col="plum2",bg="plum2")
points(fig,"sites",soil$treatment=="controle",pch=21,col="black",bg="black")
legend("topleft", levels(soil$treatment), pch=c(19), col=colors, cex = 0.7)
title(main="Bacteria")
dev.off()

#Significance:
adonis(vegdist(abund, method="horn")~soil$treatment) 
anosim(vegdist(abund, method="horn"),soil$treatment) 


ordiplot(abund.mds)



----------------------------------------------------------------
# Microbiome structure in correlation with disease variables
----------------------------------------------------------------

# Search for species in cca with ALL samples.


#data input
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria")
abund<-read.table("table-BI_16S_all-runs_grouped-mcf-f10-noCMU-biom.tsv", h=T, sep="\t", row.names = 1)
soil<-read.table("metadata_16S_all_grouped.tsv", h=T, sep="\t")
tax<-read.table("taxonomy.tsv", h=T, sep="\t", row.names = 1) #input taxonomy file
tax2<- tax[rownames(tax) %in% rownames(abund), ]
tax3<-read.table("taxonomy_table.csv",h=T,sep = ",", na.strings=c("","NA"), row.names = 1)
rownames(abund) <- tax2$ASV[match(row.names(abund), row.names(tax2))]
abund<-as.data.frame(t(abund))
soil<-soil[ order(soil$sample.id), ]
abund<-abund[ order(row.names(abund)), ]
metadata<-read.table("metadata_soil.txt", h=T) #WE ORDER METADATA SAME AS SOIL AND ABUND
metadata<-metadata[ order(metadata$sample.id), ]
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria/NMDS/norm_data")


#Remove vredepeel 2016 data
abund<-subset(abund, subset = !soil$soil=="Vredepeel" | !soil$year=="2016")
soil<-subset(soil, subset = !soil=="Vredepeel" | !year=="2016")
metadata<-subset(metadata, subset = !metadata$soil=="Vredepeel" | !metadata$year=="2016") 

abund_norm <- apply(abund, 2, sqrt)

#RDA analysis

cca <- cca(abund_norm ~ rs.dis + Condition(year+soil), metadata)

ordiplot(cca)

#Significance:
adonis(vegdist(abund, method="horn")~soil$treatment) 
anosim(vegdist(abund, method="horn"),soil$treatment) 


colors_legend <- c("black","light blue","forestgreen","chocolate1", "mistyrose4","firebrick1",
                   "mediumorchid1", "darkgoldenrod1","lightcoral","blue","plum2","lightgreen")
names_legend <- c("Control", "Control+N", "Compost-1", "Compost-2","Biomass",
                  "Terrafit-Biofum", "Mushroom_compost", "Biophosphate", 
                  "Kerapro_Son", "Kerapro_slow_release", "Hair_meal", "Keratin_mix")

pdf("rda_No2016Vredepeel_paper.pdf")
fig<-ordiplot(cca, type="points", display=c("sites", "bp", "cn"))
points(fig,"sites",soil$treatment=="CaNO3",pch=21,col="light blue",bg="light blue")
points(fig,"sites",soil$treatment=="KeraproSon",pch=21,col="lightcoral",bg="lightcoral")
points(fig,"sites",soil$treatment=="KeraproChamplus",pch=21,col="blue",bg="blue")
points(fig,"sites",soil$treatment=="Biophosphate",pch=21,col="darkgoldenrod1",bg="darkgoldenrod1")
points(fig,"sites",soil$treatment=="Keratinemengsel",pch=21,col="lightgreen",bg="lightgreen")
points(fig,"sites",soil$treatment=="GFTcompost",pch=21,col="forestgreen",bg="forestgreen")
points(fig,"sites",soil$treatment=="groencompost",pch=21,col="chocolate1",bg="chocolate1")
points(fig,"sites",soil$treatment=="fase3eind",pch=21,col="mediumorchid1",bg="mediumorchid1")
points(fig,"sites",soil$treatment=="zaadmeel",pch=21,col="firebrick1",bg="firebrick1")
points(fig,"sites",soil$treatment=="biomassavers",pch=21,col="mistyrose4",bg="mistyrose4")
points(fig,"sites",soil$treatment=="haar/hoefmeel",pch=21,col="plum2",bg="plum2")
points(fig,"sites",soil$treatment=="controle",pch=21,col="black",bg="black")
#ordiellipse(cca,metadata$treatment, label=F, col=colors, alpha=100, lwd = 2)
title(main = "Bacteria")
legend("topleft", names_legend, pch=c(19), col=colors_legend, cex = 0.8, bty = "n")
dev.off()


colors_legend <- c("light blue","purple","darkgoldenrod1", "plum2","forestgreen",
                   "firebrick1")
names_legend <- c("control", "keratin", "manure", "manure-chitin","plant",
                  "seeds")

pdf("rda_No2016Vredepeel_paper_types.pdf")
fig<-ordiplot(cca, type="points", display=c("sites", "bp", "cn"))
points(fig,"sites",metadata$treatment2=="control",pch=21,col="light blue",bg="light blue")
points(fig,"sites",metadata$treatment2=="manure-quitin",pch=21,col="plum2",bg="plum2")
points(fig,"sites",metadata$treatment2=="seeds",pch=21,col="firebrick1",bg="firebrick1")
points(fig,"sites",metadata$treatment2=="keratin",pch=21,col="purple",bg="purple")
points(fig,"sites",metadata$treatment2=="plant",pch=21,col="forestgreen",bg="forestgreen")
points(fig,"sites",metadata$treatment2=="manure",pch=21,col="darkgoldenrod1",bg="darkgoldenrod1")
#ordiellipse(cca,metadata$treatment, label=F, col=colors, alpha=100, lwd = 2)
title(main = "Bacteria")
legend("topright", names_legend, pch=c(19), col=colors_legend, cex = 0.8, bty = "n")
dev.off()

pdf("cca_plot_species.pdf")
ordiplot(cca)
dev.off()

#get species for suppression (associated with fase3eind and keratin treatments)
scores <- vegan::scores(cca, choices=c(1,2))
sps.scores <- as.data.frame(scores$species)
asv_suppressive <- subset(sps.scores, sps.scores$CCA1>1.4)
tax_suppressive <- tax3[ rownames(tax3) %in% rownames(asv_suppressive), ] 
write.csv(tax_suppressive, "ASV_suppressive_from_cca.csv")

# asv_suppressive_k <- subset(sps.scores, sps.scores$CCA1>1.4 & sps.scores$CCA1<1.8)
# tax_suppressive_k <- tax3[ rownames(tax3) %in% rownames(asv_suppressive_k), ] 
# write.csv(tax_suppressive_k, "ASV_suppressive_from_cca_keratin.csv")


#Select the CaNO3 vs keratin, fase3eind from CCA (CCA1 > 1, around 50 ASVs selected)


z <- tax_suppressive
z$otu <- rownames(z)

setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria")
seq = readDNAStringSet("sequences_rep-seqs-BI_16S_all-runs_grouped-mcf-f10-noCMU.fasta")
names(seq) = tax2$ASV
writeXStringSet(seq, "seq_ASV_names.fasta") 
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria/NMDS/norm_data")

selected_sequences <- seq[z$otu]
writeXStringSet(selected_sequences, "seq_ASV_suppressive_CCA.fasta")

#align sequences: for that you use Mafft (in Linux terminal) 
#and make a high accurate alignment: mafft --maxiterate 1000 --globalpair seq.fasta > align.fasta
#Keep it as PHYLIP (.PHY) with SeaView. Keep the files.


alignment<-read.alignment(file = "align_seq_ASV_suppressive_CCA.phy", format = "phylip") #read PHYLIP file alignment

alig.mat<-as.matrix.alignment(alignment) #save the aligment as matrix
seq.dist<-dist.alignment(alignment) # to calculate the pairwise distances between aligned sequences
mytree<-nj(seq.dist) #calculate a neighbor-joining tree estimation 
mytree<-makeLabel(mytree, space="") # get rid of spaces in tip names
plot(mytree)

#Create a phyloseq object

abund_norm <- t(abund_norm)
abund_norm <- abund_norm[order(row.names(abund_norm)) , ]
z <- z[order(row.names(z)) , ]

abund.suppres<- abund_norm[ rownames(abund_norm) %in% rownames(z), ] #select in abund only the suppress. ASVs

z <- as.matrix(z)

rownames(metadata) <- metadata$sample.id  #is the table with metadata
mytree #tree 

rownames(metadata) <- paste0("X", rownames(metadata))

OTU = otu_table(abund.suppres, taxa_are_rows = TRUE)
TAX = tax_table(z)
sampledata=sample_data(metadata)

physeq = phyloseq(OTU, TAX, sampledata, mytree)


# Average per type of treatment (keratin, quitin, manure, plant, etc)

X1 = merge_samples(physeq,group = "treatment2",fun=sum)


#convert treatments as factors
sample_data(X1)$treatment2 <- factor(sample_names(X1))

#Check that are the same names
levels(sample_data(X1)$treatment2)
rownames(sample_data(X1))

p <- ggtree(X1, layout="rectangular") + 
  geom_text2(aes(subset=!isTip, label=label), hjust=-1, size=1, nudge_x = 0.3) +
  geom_tiplab(align=T, linetype='dotted', linesize=.3, aes(label=Genus, size=0.8), 
              hjust=0, offset = 0.05) +
  geom_tippoint(aes(x=x+hjust,y=y+hjust, shape=treatment2, size=Abundance)
                ,na.rm=TRUE) +
  theme(legend.position="right") +
  scale_color_viridis(discrete = TRUE, option = "D", direction = -1, na.value = "grey50") +
  aes(color=Order) + 
  labs(title= "Suppressive Bacteria")

ggsave("tree_CCA_suppres_samples_Means_Genus_CaNO3_newNames_type_Treatment_ggtree_paper2.pdf", width = 10,
       height = 18)



------------------------------------------------------------------------------------
#
# cca with 2016 samples: how other soil parameters affect suppressiveness?
#
------------------------------------------------------------------------------------

#data input
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria")
abund<-read.table("table-BI_16S_all-runs_grouped-mcf-f10-noCMU-biom.tsv", h=T, sep="\t", row.names = 1)
soil<-read.table("metadata_16S_all_grouped.tsv", h=T, sep="\t")
tax<-read.table("taxonomy.tsv", h=T, sep="\t", row.names = 1) #input taxonomy file
tax2<- tax[rownames(tax) %in% rownames(abund), ]
tax3<-read.table("taxonomy_table.csv",h=T,sep = ",", na.strings=c("","NA"), row.names = 1)
rownames(abund) <- tax2$ASV[match(row.names(abund), row.names(tax2))]
abund<-as.data.frame(t(abund))
soil<-soil[ order(soil$sample.id), ]
abund<-abund[ order(row.names(abund)), ]
metadata<-read.table("metadata_soil.txt", h=T) #WE ORDER METADATA SAME AS SOIL AND ABUND
metadata<-metadata[ order(metadata$sample.id), ]
setwd("W:/Folder/3 Work archive/3 Analysis/Bacteria/NMDS/norm_data")


#Remove vredepeel 2016 data
abund<-subset(abund, subset = !soil$soil=="Vredepeel" | !soil$year=="2016")
soil<-subset(soil, subset = !soil=="Vredepeel" | !year=="2016")
metadata<-subset(metadata, subset = !metadata$soil=="Vredepeel" | !metadata$year=="2016") 

abund_norm <- apply(abund, 2, sqrt)


#We inlcude in the RDA analysis the parameters which are relevant based on corr() and lm()


cca<- cca(abund_norm~N.Levering+Ca.besch+Cu.PAE+P.PAE+Se+Zn.PAE+Co.PAE+Mn+PMN+N.Tot+HWC+Mg+CN+pH+rs.dis
          + Condition(soil+year), metadata)

colors_legend <- c("black","light blue","forestgreen","chocolate1", "mistyrose4","firebrick1",
                   "mediumorchid1", "darkgoldenrod1","lightcoral","blue","plum2","lightgreen")

names_legend <- c("Control", "Control+N", "Compost-1", "Compost-2","Biomass",
                  "Terrafit-Biofum", "Mushroom_compost", "Biophosphate", 
                  "Kerapro_Son", "Kerapro_slow_release", "Hair_meal", "Keratin_mix")

colors<-c("mistyrose4","darkgoldenrod1","light blue","black","mediumorchid1", "forestgreen",
          "chocolate1","plum2","blue", "lightcoral", "lightgreen","firebrick1")

pdf("cca_soildata_NoVredepeel2016_newNames_paper.pdf")
fig<-ordiplot(cca, type="points", display=c("sites", "bp", "cn"), choices = c(1, 2))
points(fig,"sites",soil$treatment=="CaNO3",pch=21,col="light blue",bg="light blue")
points(fig,"sites",soil$treatment=="KeraproSon",pch=21,col="lightcoral",bg="lightcoral")
points(fig,"sites",soil$treatment=="KeraproChamplus",pch=21,col="blue",bg="blue")
points(fig,"sites",soil$treatment=="Biophosphate",pch=21,col="darkgoldenrod1",bg="darkgoldenrod1")
points(fig,"sites",soil$treatment=="Keratinemengsel",pch=21,col="lightgreen",bg="lightgreen")
points(fig,"sites",soil$treatment=="GFTcompost",pch=21,col="forestgreen",bg="forestgreen")
points(fig,"sites",soil$treatment=="groencompost",pch=21,col="chocolate1",bg="chocolate1")
points(fig,"sites",soil$treatment=="fase3eind",pch=21,col="mediumorchid1",bg="mediumorchid1")
points(fig,"sites",soil$treatment=="zaadmeel",pch=21,col="firebrick1",bg="firebrick1")
points(fig,"sites",soil$treatment=="biomassavers",pch=21,col="mistyrose4",bg="mistyrose4")
points(fig,"sites",soil$treatment=="haar/hoefmeel",pch=21,col="plum2",bg="plum2")
points(fig,"sites",soil$treatment=="controle",pch=21,col="black",bg="black")
#ordiellipse(rda,metadata$treatment, label=F, col=colors, alpha=100, lwd = 2, choices = c(1, 2))
title(main = "Bacteria")
legend("bottomleft", names_legend, pch=c(19), col=colors_legend, cex = 0.7)
dev.off()

adonis(vegdist(abund_norm, method="horn")~soil$treatment) 


#with treatment type

colors_legend <- c("light blue","purple","darkgoldenrod1", "plum2","forestgreen",
                   "firebrick1")
names_legend <- c("control", "keratin", "manure", "manure-chitin","plant",
                  "seeds")

pdf("cca_soildata_NoVredepeel2016_newNames_paper_types.pdf")
fig<-ordiplot(cca, type="points", display=c("sites", "bp", "cn"))
points(fig,"sites",metadata$treatment2=="control",pch=21,col="light blue",bg="light blue")
points(fig,"sites",metadata$treatment2=="manure-quitin",pch=21,col="plum2",bg="plum2")
points(fig,"sites",metadata$treatment2=="seeds",pch=21,col="firebrick1",bg="firebrick1")
points(fig,"sites",metadata$treatment2=="keratin",pch=21,col="purple",bg="purple")
points(fig,"sites",metadata$treatment2=="plant",pch=21,col="forestgreen",bg="forestgreen")
points(fig,"sites",metadata$treatment2=="manure",pch=21,col="darkgoldenrod1",bg="darkgoldenrod1")
title(main = "Bacteria")
legend("bottomright", names_legend, pch=c(19), col=colors_legend, cex = 0.8, bty = "n")
dev.off()


adonis(vegdist(abund_norm, method="horn")~metadata$treatment2) 

anov <- anova(cca, by="terms")




------------------------------------------------------------------------------------
#
# Taxa quantification for all ASVs from keratin/chitin (from CCA)
#
------------------------------------------------------------------------------------

ordiplot(cca)

#get species for suppression (associated with fase3eind and keratin treatments)
scores <- vegan::scores(cca, choices=c(1,2))
sps.scores <- as.data.frame(scores$species)
asv_suppressive <- subset(sps.scores, sps.scores$CCA1>1)
tax_suppressive <- tax3[ rownames(tax3) %in% rownames(asv_suppressive), ] 
write.csv(tax_suppressive, "ASV_keratin-chitin_from_cca.csv")

library(dplyr)
library(ggplot2)
library(ggpubr)

data <- tax_suppressive


#FAMILY
df <- data %>%
  group_by(Family) %>%
  summarise(counts = (n()/260)*100)
df <- df[order(df$counts), ]
df <- subset(df, subset = !df$Family=="unidentified")

p <- ggplot(df, aes(x = reorder(Family, counts), y = counts)) + 
  geom_bar(stat = "identity", fill="lightgray")+ coord_flip()+
  ggtitle("Bacteria: Family") + labs(y = "% num. ASVs", x= "Family", size=8) + theme_pubclean() +
  theme(axis.text.x=element_text(hjust=1, size = 9),
        axis.text.y=element_text(size = 9))
ggsave(p, filename = "ASV_keratin-chitin_from_cca_Family_ASVs_number.pdf",
       width = 12, height = 9)




##########################################################################################################################
# 
# 								FUNGI
#
#
##########################################################################################################################


#NOTE: we remove from 'metadata' and 'soil' 2 samples: 208-ITS28S.2016.G04.S76 (Lisse-GFTcompost-2016) and 
#      211-ITS28S.2017.H07.S91 (Vredepeel-CaNO3-2017) as they do not appear in 'abund' file.

# Scripts are the same that were used with the BACTERIA data






##########################################################################################################################
#
#  Soil parameters correlations
#
##########################################################################################################################


data<-read.table("soil_parameters.txt", h=T, sep = "\t") #only 2016 year available
info<-read.table("soil_descriptions.txt", h=T, sep = "\t")

# There is a correlation between factors?

library(Hmisc)
library(corrplot)


data2<-data[ , 7:45]
data2<-as.matrix(data2)

cor<-rcorr(data2, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)

info$color<-info$classification
info$color<-gsub("biological", "forestgreen", info$color)
info$color<-gsub("chemical", "gold2", info$color)
info$color<-gsub("disease", "black", info$color)
info$color<-gsub("physical", "blue2", info$color)
info$color<-gsub("plant", "black", info$color)

col2<-info$color

pdf("corr_heatmap_soil_parameters.pdf", w=6, h=6)
corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "blank", col=col, tl.col = col2, tl.cex=0.7)
dev.off()


#For hclust display:

ord <- corrMatOrder(cor$r, order="hclust") #to keep order of labels in hclust
newcolours <- col2[ord] 

pdf("corr_heatmap_soil_parameters_hclust.pdf", w=6, h=6)
corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "blank", col=col, tl.col = newcolours, 
         tl.cex=0.7, order = c("hclust"))
legend( 0.870823, 25.87265, cex=0.8, legend=levels(info$classification), 
       fill =c("forestgreen", "gold2", "blue2", "black"), 
       border = c("white"), box.col= c("white"))
dev.off()

# We used locator() to find coordinates for legend


---------------------------------------------------------------------------
#
#  see soil characteristics in different type of soil (Lisse vs Vredepeel)
#
--------------------------------------------------------------------------

data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")

dataL<-subset(data, subset = data$soil == "Lisse")
dataV<-subset(data, subset = data$soil == "Vredepeel")

pdf("soil_properties_boxplots.pdf", w=7, h=5)
par(mfrow=c(2, 1), cex=0.7)
boxplot(log(dataL[,7:50]), main="Lisse", las=2, col = col2, boxwex=0.8)
boxplot(log(dataV[,7:50]), main="Vredepeel", las=2, col = col2, boxwex=0.8)
dev.off()


  #now with Lisse-Vredepeel in same plot

library(ggplot2)
library(ggpubr)
library(reshape2)

data2<-subset(data, subset = data$treatment == "Control") #select only control treatment
data2<-data2[ , 24:57]
data2<-subset(data2, select = -c(treatment))

data3 <- melt(data2, id.var = "soil")

my_comparisons <- list( c("Lisse", "Vredepeel"))
                        
pdf("soil_properties_boxplots_together.pdf", h=10, w=15)
p <- ggplot(data = data3, aes(x=variable, y=value))+
  geom_boxplot(aes(fill=soil))
p + theme_bw() + facet_wrap( ~ variable, scales="free")
dev.off()


  #CaNO3 as control (no Vredepeel). With new names for paper

data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")

data<-subset(data, subset = !soil=="Vredepeel" | !year=="2016")

data2<-subset(data, subset = data$treatment == "Control+N") #select only control CaNO3 treatment
data2<-data2[ , 6:57]
data2<-data2[ -c(2:11,13:18) ]

data3 <- melt(data2, id.var = "soil")

my_comparisons <- list( c("Lisse", "Vredepeel"))

pdf("soil_properties_boxplots_together_CaNO3.pdf", h=10, w=15)
p <- ggplot(data = data3, aes(x=variable, y=value))+
  geom_boxplot(aes(fill=soil))
p + theme_bw() + facet_wrap( ~ variable, scales="free")
dev.off()




---------------------------------------------------------------------------
#
#  Lisse vs vredepeel soil properties (only control)
#
---------------------------------------------------------------------------

data<-read.table("soil_parameters_Control.txt", h=T, sep = "\t")

library(ggplot2)
library(plyr)
library(reshape2)

melted <- melt(data)

means <- ddply(melted, c("soil", "parameter"), summarise,
               mean=mean(value, na.rm = T), sem=sd(value, na.rm = T))

pdf("barplot_soils_control.pdf", w=7,h=5)
plot<-ggplot(means, aes(x=parameter,y = mean, fill= soil)) + geom_bar(stat = "identity", position="dodge") +
  scale_fill_manual(values=c("#E69F00", "darkgreen", "#999999")) +
  geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge") #bars represent SD
plot + scale_y_continuous(trans='log10') + theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()


library("ggpubr")

pdf("boxplot_soil_parameters_control.pdf", w=10,h=10)
plot<-ggboxplot(data, x = "parameter", y = "value", color = "soil",
          palette = c("#00AFBB", "#E7B800"))
plot + coord_trans(y = "log10") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()

#Pre-tests
shapiro.test(data$value) # Not normal data
leveneTest(value ~ soil*parameter, data = data) #homegenicity of variance not good

#ANOVA

# fit = lm(formula = value ~ soil*parameter, data)
# results<-aov(fit)
# anova_results<-TukeyHSD(results)
# write.csv(anova_results$`soil:parameter`, "anova_results_control.csv")
# 

anovaR<-aov(value ~ as.factor(parameter) * as.factor(soil),data = data)
res<-anovaR$residuals
hist(res,main="Histogram of residuals", xlab="Residuals")
leveneTest(value ~ as.factor(soil)*as.factor(parameter),data=data) #not normal
summary(anovaR)

posthoc<-TukeyHSD(anovaR)
write.csv(posthoc$`as.factor(parameter):as.factor(soil)`, "anova_results_control.csv")

#Corrplot
library(Hmisc)
library(corrplot)
library(car)

data<-read.table("soil_parameters.txt", h=T, sep = "\t")
data<-subset(data, treatment == c("Controle "))

data2<-data[ , 7:50]
data2<-as.matrix(data2)

cor<-rcorr(data2, type = c("spearman"))

corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "blank", tl.cex=0.7)



------------------------------------------------------------------------------------
#
#  Soil parameters correlations: only year 2016 and ergosterol,PMN, HWC and fungal and bacterial biomass
#
------------------------------------------------------------------------------------

data<-read.table("metadata_soil.txt", h=T, sep = "\t")
#Remove vredepeel 2016 data
data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")
dataV<-subset(data, subset = data$soil=="Vredepeel")
dataL<-subset(data, subset = data$soil=="Lisse")

# There is a correlation between factors?

library(Hmisc)
library(corrplot)

data<-data[ , 8:14]
data<-as.matrix(data)

shapiro.test(data) #data are not normal
hist(data[,7]) #to see distribution of variables. We did with all columns

cor<-rcorr(data, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)

pdf("corr_heatmap_soil_parameters_2016_final.pdf", w=5, h=5)

corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "label_sig", col=col, tl.col = "black", tl.cex=1)

dev.off()


library(psych)
pdf("scatter_plot_soil_parameters_2016_final.pdf")
pairs.panels(data, 
             method = "spearman", # correlation method
             hist.col = "gray",
             density = TRUE,  # show density plots
             ellipses = F, # show correlation ellipses
             lm=T,
             stars = T,
             cex.cor = 1
             
)
dev.off()



#only Lisse

dataL<-dataL[ , 8:14]
dataL<-as.matrix(dataL)

shapiro.test(dataL) #data are not normal
hist(dataL[,7]) #to see distribution of variables. We did with all columns

cor<-rcorr(dataL, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)

pdf("corr_heatmap_soil_parameters_2016_final_Lisse.pdf", w=5, h=5)

corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "label_sig", col=col, tl.col = "black", tl.cex=1)
title(main = "Lisse")
dev.off()


#only Vredepeel

dataV<-dataV[ , 8:14]
dataV<-as.matrix(dataV)

shapiro.test(dataV) #data are not normal
hist(dataV[,7]) #to see distribution of variables. We did with all columns

cor<-rcorr(dataV, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)

pdf("corr_heatmap_soil_parameters_2016_final_Vredepeel.pdf", w=5, h=5)

corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "label_sig", col=col, tl.col = "black", tl.cex=1)
title(main = "Vredepeel")
dev.off()



#For hclust display:

ord <- corrMatOrder(cor$r, order="hclust") #to keep order of labels in hclust
newcolours <- col2[ord] 

pdf("corr_heatmap_soil_parameters_hclust_2016_new.pdf", w=6, h=6)
corrplot(cor$r, type= c("upper"), method= c("circle"), p.mat=cor$P, 
         sig.level = 0.05, insig = "blank", col=col, tl.col = "black",
         tl.cex=1, order = c("hclust"))
dev.off()


------------------------------------------------------------------------------------
#
# What treatments give more disease?
------------------------------------------------------------------------------------

data <- read.table("metadata_soil.txt", h=T)

#Remove vredepeel 2016 data
data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")
dataL<-subset(data, subset = data$soil=="Lisse")
dataV<-subset(data, subset = data$soil=="Vredepeel")

#tapply(X = data$rs.dis, INDEX = list(data$treatment), FUN = mean)

x <- pairwise.t.test(data$rs.dis, data$treatment, p.adj = "bonferroni")


write.csv (x$p.value, "rs.dis.stats.csv")


#PLOT: 

library(tidyverse)
library(ggplot2)
library(ggpubr)

data <- data %>% select(soil, treatment, rs.dis)

melted <- melt(data)

means <- ddply(melted, c("treatment", "variable"), summarise,
               mean=mean(value, na.rm = T), sem=sd(value, na.rm = T))

pdf("barplot_disease_all.pdf", w=7,h=5)
plot<-ggplot(means, aes(x=treatment,y = mean, fill= variable)) + geom_bar(stat = "identity", position="dodge") +
  geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem), position="dodge") #bars represent SD
plot + theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()

#boxplot

pdf("boxplot_disease_all.pdf")
bp <- ggplot(melted, aes(x=treatment, y=value, fill=variable)) + 
  geom_boxplot()+
  labs(title="Disease",x="treatment", y = "Log2 (disease score)")
bp + theme_classic() + theme(axis.text.x = element_text(angle = 90)) +
  scale_y_continuous(trans='log2')
dev.off()


#boxplot (separate diseases)

meltedRS <- subset(melted, melted$variable=="rs.dis")

pdf("boxplot_RS_disease_all_padj_values.pdf")
bp <- ggplot(meltedRS, aes(x=treatment, y=value, fill=variable)) + 
  geom_boxplot(fill="indianred1")+
  labs(title="Disease-RS",x="treatment", y = "disease score")
bp + theme_classic() + theme(axis.text.x = element_text(angle = 90))+ 
  stat_compare_means(aes(label=..p.adj..), method = "t.test",
                   ref.group = "CaNO3") 
dev.off()




# Create a boxplot and add  p-adj values:


data<-read.table("metadata_soil_new_names.txt", h=T, sep = "\t")


--------------
# RS disease
-------------

pdf("rs.dis.anova.padj.values_CaNO3control_new_names.pdf", w=5, h=5)
p <- ggboxplot(data, x = "treatment", y = "rs.dis", color="black", fill = "type", 
               palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                          "indianred1"),
               ggtheme = theme_bw())+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12), 
        axis.text.y = element_text(hjust = 1, size = 12),
        text = element_text(size = 15))+
  stat_compare_means(method = "anova", label.y = 30, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +  
  ggtitle(expression(paste(italic("R. solani"),"disease",
         collapse=""))) + 
  rremove("grid") 
ggpar(p, 
              xlab ="Treatment",
              ylab = "Disease spread (cm)")

dev.off()

library(car)
my_anova <- aov(rs.dis ~ type * soil * year, data = data)
Anova(my_anova, type = "II")



---------------------------
#Biological parameters
---------------------------


#ALL in one plot with new names

#we remove 2016 vredepeel data
data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")

#convert year as factor
data$year <- factor(data$year, 
                    levels = c(2016, 2017),
                    labels = c("2016", "2017"))

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

order <- c("Control", "Control+N", "Compost-1", "Compost-2","Biomass",
           "Terrafit-Biofum", "Mushroom_compost", "Biophosphate", 
           "KeraproSon", "Kerapro_slow_release", "Hair_meal", "Keratin_mix")


data$treatment <- as.character(data$treatment)
data$treatment <- factor(data$treatment, levels=order)


PMN <- ggboxplot(data, x = "treatment", y = "PMN", fill="treatment2",
                 palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                            "indianred1"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +
  ggtitle("PMN") +
rremove("grid") 
PMN <- ggpar(PMN, tickslab = T,
      ylab = "mg N / kg soil")

HWC <- ggboxplot(data, x = "treatment", y = "HWC", fill="treatment2", 
                 palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                            "indianred1"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 180, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +
  ggtitle("HWC")+
  rremove("grid") 
HWC <- ggpar(HWC, tickslab = T,
      ylab = "mg C / kg soil")

ergos <- ggboxplot(data, x = "treatment", y = "ergosterol", fill="treatment2", 
                   palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                              "indianred1"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 10, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +
  ggtitle("ergosterol")+
  rremove("grid") 
ergos <- ggpar(ergos, tickslab = T,
      ylab = "mg ergosterol / kg soil")


fb <- ggboxplot(data, x = "treatment", y = "fungi", fill="treatment2", 
                palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                           "indianred1"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 200, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +
  ggtitle("Fungal biomass")+
  rremove("grid") 
fb <- ggpar(fb, tickslab = T,
      ylab = "mg C / kg soil")

bb <- ggboxplot(data, x = "treatment", y = "bacteria", fill="treatment2", 
                palette =c("light blue","mediumpurple1","gold", "lightpink","olivedrab3",
                           "indianred1"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position="bottom")+
  stat_compare_means(method = "anova", label.y = 80, label.x = 5)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control+N") +
  ggtitle("Bacterial biomass")+
  rremove("grid") 
bb <- ggpar(bb, tickslab = T,
      ylab = "mg C / kg soil")


plot <- ggarrange(PMN, HWC, ergos, fb, bb, 
          ncol = 1, nrow = 5)

ggsave("biological_parameters.all.anova.padj.values_CaNO3control.pdf", plot = plot, width = 7, height = 15)



my_anova <- aov(bacterial.biomass ~ treatment * soil * year, data = data)
Anova(my_anova, type = "II")

my_anova <- aov(fungal.biomass ~ treatment * soil * year, data = data)
Anova(my_anova, type = "II")

my_anova <- aov(HWC ~ treatment * soil * year, data = data)
Anova(my_anova, type = "II")



-------------------------------
#  Soil parameters correlations
--------------------------------

data<-read.table("soil_parameters_new.csv", h=T, sep = ",")

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

# There is a correlation between factors?

library(Hmisc)
library(corrplot)


data2<-data[ , 25:57]
data2<-as.matrix(data2)

cor<-rcorr(data2, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)


#For hclust display:

ord <- corrMatOrder(cor$r, order="hclust") #to keep order of labels in hclust


pdf("corr_heatmap_soil_parameters_hclust_NoVredepeel2016_newNames.pdf", w=6, h=6)
corrplot(cor$r, type= c("upper"), method= c("ellipse"), p.mat=cor$P, 
         sig.level = 0.05, insig = "blank", col=col, 
         tl.cex=0.7, tl.col="black", order = c("hclust"))
dev.off()


# with only the ones showing a significant correlation

data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

data2<-data[ , 25:57]


library("ggpubr")

HWC <- ggscatter(data2, x = "Rs.dis", y = "HWC", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "spearman",
          ylab = "HWC", xlab = "Rs.disease")
PMN <- ggscatter(data2, x = "Rs.dis", y = "PMN", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "PMN", xlab = "Rs.disease")
Mn.t <- ggscatter(data2, x = "Rs.dis", y = "Mn.t", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Mn.t", xlab = "Rs.disease")
Co.a <- ggscatter(data2, x = "Rs.dis", y = "Co.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Co.a", xlab = "Rs.disease")
Se.a <- ggscatter(data2, x = "Rs.dis", y = "Se.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Se.a", xlab = "Rs.disease")
Ca.a <- ggscatter(data2, x = "Rs.dis", y = "Ca.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Ca.a", xlab = "Rs.disease")
Zn.a <- ggscatter(data2, x = "Rs.dis", y = "Zn.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Zn.a", xlab = "Rs.disease")
N.a <- ggscatter(data2, x = "Rs.dis", y = "N.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "N.a", xlab = "Rs.disease")
N.t <- ggscatter(data2, x = "Rs.dis", y = "N.t", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "N.t", xlab = "Rs.disease")
P.a <- ggscatter(data2, x = "Rs.dis", y = "P.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "P.a", xlab = "Rs.disease")
pH <- ggscatter(data2, x = "Rs.dis", y = "pH", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "pH", xlab = "Rs.disease")
Cu.a <- ggscatter(data2, x = "Rs.dis", y = "Cu.a", 
                 add = "reg.line", conf.int = TRUE, 
                 cor.coef = TRUE, cor.method = "spearman",
                 ylab = "Cu.a", xlab = "Rs.disease")
plot <- ggarrange(PMN, HWC, Mn.t, Co.a, Se.a, Ca.a, Zn.a, N.a, N.t, P.a, pH, Cu.a + rremove("x.text"), 
                  ncol = 3, nrow = 4)

ggsave("corplots_signif_soil_parameters_noVredepel2016.pdf", plot = plot, width = 8, height = 10)

#For respiration and CN of the product (Note: no replicates, only one measurement)

data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")
data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")


CN <- ggscatter(data, x = "Rs.dis", y = "CN.product", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "spearman",
          xlab = "C/N product", ylab = "Rs disease")

resp <- ggscatter(data, x = "Rs.dis", y = "resp.product", 
                add = "reg.line", conf.int = TRUE, 
                cor.coef = TRUE, cor.method = "spearman",
                xlab = "Respiration product", ylab = "Rs disease")

plot <- ggarrange(resp, CN + rremove("x.text"), 
                  ncol = 1, nrow = 2)

ggsave("corplots_CN-resp_products.pdf", plot = plot, width = 5, height = 7)

#with average

data <- subset(data, subset = !data$treatment=="Control")
data <- subset(data, subset = !data$treatment=="Control+N")

data2 <- data[, c("treatment","CN.product","resp.product", "Rs.dis")]

library("reshape2")
data3 <- melt(data2, id = c("treatment"))
res <- dcast(data3, treatment ~ variable, mean)


CN <- ggscatter(res, x = "Rs.dis", y = "CN.product", 
                add = "reg.line", conf.int = TRUE, 
                cor.coef = TRUE, cor.method = "spearman",
                xlab = "C/N product", ylab = "Rs disease")

resp <- ggscatter(res, x = "Rs.dis", y = "resp.product", 
                  add = "reg.line", conf.int = TRUE, 
                  cor.coef = TRUE, cor.method = "spearman",
                  xlab = "Respiration product", ylab = "Rs disease")

plot <- ggarrange(resp, CN + rremove("x.text"), 
                  ncol = 1, nrow = 2)

ggsave("corplots_CN-resp_products_mean_values.pdf", plot = plot, width = 5, height = 7)



------------------------------------------------------------------------
# Correlation between respiration rate and CN (only organic products) and disease
# Important: those parameters were measured only once (1 replicate)
------------------------------------------------------------------------


data<-read.table("soil_parameters_new.csv", h=T, sep = ",")

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

# There is a correlation between factors?

library(Hmisc)
library(corrplot)


data2<-data[ , c("resp.product", "CN.product", "rs.dis")]
data2<-as.matrix(data2)

cor<-rcorr(data2, type = c("spearman"))

col <- colorRampPalette(c("darkorange", "white", "steelblue"))(20)

pdf("corr_heatmap_resp-CN.product.pdf", w=6, h=6)
corrplot(cor$r, type= c("upper"), method= c("number"), p.mat=cor$P, 
         sig.level = 0.01, insig = "blank", tl.cex=0.7, col = col)
dev.off()




-------------------------------------------------------------------------
# boxplots with soil physico-chemic parameters for type of treatment 
# (CaNO3 as control; no Vredepeel 2016 samples)
-------------------------------------------------------------------------


data<-read.table("soil_parameters_new_new_names.csv", h=T, sep = ",")

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

#convert year as factor
data$year <- factor(data$year, 
                    levels = c(2016, 2017),
                    labels = c("2016", "2017"))


data$treatment2 <- as.character(data$treatment2)


Cu.a <- ggboxplot(data, x = "treatment2", y = "Cu.a", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), 
                  ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Cu.a") + ylab("g/kg")

# pH <- ggboxplot(data, x = "treatment2", y = "pH", fill="treatment2", ggtheme = theme_bw())+
#  theme(axis.text.x = element_blank(), legend.position = "none")+
#  stat_compare_means(method = "anova", label.y = 10, label.x = 2)+
#  stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#  ggtitle("pH") #No differences in any treatment

# CaCO3 <- ggboxplot(data, x = "treatment2", y = "CaCO3", fill="treatment2", ggtheme = theme_bw())+
#    theme(axis.text.x = element_blank(), legend.position = "none")+
#    stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#    stat_compare_means(label = "p.signif",
#                       ref.group = "Control+N") +
#    ggtitle("CaCO3") #No differences in any treatment


# S.t <- ggboxplot(data, x = "treatment2", y = "S.t", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("S.t") + ylab("mg/kg")

# S.a <- ggboxplot(data, x = "treatment2", y = "S.a", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("S.a") #No differences

P.a <- ggboxplot(data, x = "treatment2", y = "P.a", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("P.a")+ ylab("mg/kg")

# sand <- ggboxplot(data, x = "treatment", y = "sand", color="treatment", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control+N") +
#   ggtitle("sand") #No differences

B.t <- ggboxplot(data, x = "treatment2", y = "B.t", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("B.t")+ ylab("mmol+/kg")

K.t <- ggboxplot(data, x = "treatment2", y = "K.t", fill="treatment2",
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 10, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("K.t")+ ylab("mmol+/kg")

K.a <- ggboxplot(data, x = "treatment2", y = "K.a", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("K.a")+ ylab("mg/kg")

Na.t <- ggboxplot(data, x = "treatment2", y = "Na.t", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 60, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Na.t")+ ylab("mmol+/kg")

P.t <- ggboxplot(data, x = "treatment2", y = "P.t", fill="treatment2",
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("P.t")+ ylab("mg P2O5/l")

N.t <- ggboxplot(data, x = "treatment2", y = "N.t", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("N.t") #No differences

N.a <- ggboxplot(data, x = "treatment2", y = "N.a", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("N.a")+ ylab("mg/kg")

Mo.a <- ggboxplot(data, x = "treatment2", y = "Mo.a", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 10, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Mo.a")+ ylab("g/kg")

Ca.t <- ggboxplot(data, x = "treatment2", y = "Ca.t", fill="treatment2",
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Ca.t")+ ylab("mmol+/kg")

C.N <- ggboxplot(data, x = "treatment2", y = "C.N", fill="treatment2", 
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 40, label.x = 2)+
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("C.N") #No differences

# silt <- ggboxplot(data, x = "treatment2", y = "silt", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("silt") #No differneces

# OM <- ggboxplot(data, x = "treatment2", y = "OM", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("OM") #No differences

# CEC <- ggboxplot(data, x = "treatment2", y = "CEC", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("CEC") #Almost no differences

Mg.t <- ggboxplot(data, x = "treatment2", y = "Mg.t", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 300, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Mg.t")+ ylab("mmol+/kg")


N.a <- ggboxplot(data, x = "treatment2", y = "N.a", fill="treatment2",
                 palette =c("light blue","lightpink","indianred1",
                            "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("N.a")+ ylab("mg/kg")

# Zn.a <- ggboxplot(data, x = "treatment2", y = "Zn.a", fill="treatment2", ggtheme = theme_bw())+
#   theme(axis.text.x = element_blank(), legend.position = "none")+
#   stat_compare_means(method = "anova", label.y = 100, label.x = 2)+
#   stat_compare_means(label = "p.signif",
#                      ref.group = "Control") +
#   ggtitle("Zn.a") #No differences

Ca.a <- ggboxplot(data, x = "treatment2", y = "Ca.a", fill="treatment2",
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Ca.a")+ ylab("mmol+/kg")


Co.a <- ggboxplot(data, x = "treatment2", y = "Co.a", fill="treatment2",
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 15, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Co.a")+ ylab("g/kg")

Mn.t <- ggboxplot(data, x = "treatment2", y = "Mn.t", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position = "none")+
  stat_compare_means(method = "anova", label.y = 100, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Mn.t")+ ylab("mmol+/kg")

silica <- ggboxplot(data, x = "treatment2", y = "silica", fill="treatment2", 
                    palette =c("light blue","lightpink","indianred1",
                               "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_blank(), legend.position="none")+
  stat_compare_means(method = "anova", label.y = 20000, label.x = 2)+
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("silica")+ ylab("g/kg")

Se.a <- ggboxplot(data, x = "treatment2", y = "Se.a", fill="treatment2", 
                  palette =c("light blue","lightpink","indianred1",
                             "mediumpurple1", "olivedrab3","gold"), ggtheme = theme_bw())+
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position="right")+
  stat_compare_means(method = "anova", label.y = 10, label.x = 2)+   
  stat_compare_means(label = "p.signif",
                     ref.group = "Control") +
  ggtitle("Se.a")+ ylab("g/kg")


plot <- ggarrange(B.t,Ca.a,Ca.t,Co.a,Cu.a,K.a,K.t,Mg.t,Mn.t,Mo.a,N.a,Na.t,P.a,
                  P.t,N.t,N.a,C.N,silica,Se.a + rremove("x.text"), 
                  ncol = 5, nrow = 4, common.legend = TRUE, legend="bottom")

ggsave("boxplots_soil_physico-chemic_parameters_with_differences_per_type_treatment_NoVredepeel2016.pdf", plot = plot, width = 15, height = 15)




data<-read.table("soil_parameters_new_names.csv", h=T, sep = ",")

data<-subset(data, subset = !data$soil=="Vredepeel" | !data$year=="2016")

#convert year as factor
data$year <- factor(data$year, 
                    levels = c(2016, 2017),
                    labels = c("2016", "2017"))


data$treatment2 <- as.character(data$treatment2)
data$treatment <- as.character(data$treatment)

data2<-data[ , 7:57]
data2<-data2[ -c(2:7,9:18) ]
data3<-data2[ -c(2) ]
data4<-data2[ -c(1) ]


fit = lm(formula = data4$Rs.dis ~ . , data=data4)
results<-aov(fit)
anova_results<-TukeyHSD(results)




######################################################################
# 				NETWORK
######################################################################





library(igraph)

library(qgraph)

library(vegan)

library(MCL)



# Install SpiecEasi package
install.packages("devtools")

library(devtools)
install_github("zdk123/SpiecEasi")

library(SpiecEasi)



#data input


#bacteria
abund.b<-read.table("table-BI_16S_all-runs_grouped-mcf-f10-noCMU-biom.tsv", h=T, sep="\t", row.names = 1)
soil.b<-read.table("metadata_16S_all_grouped.tsv", h=T, sep="\t")
tax.b<-read.table("taxonomy.tsv", h=T, sep="\t", row.names = 1) #input taxonomy file
tax2.b<- tax.b[rownames(tax.b) %in% rownames(abund.b), ]
rownames(abund.b) <- tax2.b$ASV[match(row.names(abund.b), row.names(tax2.b))]
abund.b<-as.data.frame(t(abund.b))
soil.b<-soil.b[ order(soil.b$sample.id), ]
abund.b<-abund.b[ order(row.names(abund.b)), ]
metadata.b<-read.table("metadata_soil.txt", h=T) #WE ORDER METADATA SAME AS SOIL AND ABUND
metadata.b<-metadata.b[ order(metadata.b$sample.id), ]
tax3.b<-read.table("taxonomy_table.csv",h=T,sep = ",", na.strings=c("","NA"), row.names = 1)


#fungi
abund.f<-read.table("table_ITS-dada2-14-q2ca2_tax-u8-dyn_ITS2-rs_kFungi_sfmin53.tsv", h=T, sep="\t", row.names = 1, check.names = F)
soil.f<-read.table("metadata_ITS-28S_2016-2017_modif.csv", h=T, sep="\t")
tax.f<-read.table("tax-unite-ver8-dyn_ITS2-ref-seqs_ITS-dada2-14-q2ca2_kFungi_sfmin53.tsv", h=T, sep="\t", row.names = 1) #input taxonomy file
rownames(abund.f) <- tax.f$ASV[match(row.names(abund.f), row.names(tax.f))]
abund.f<-as.data.frame(t(abund.f))
soil.f<-soil.f[ order(soil.f$sample.id), ]
abund.f<-abund.f[ order(row.names(abund.f)), ]
metadata.f<-read.table("metadata2_fungi.csv", h=T, sep = ",") #WE ORDER METADATA SAME AS SOIL AND ABUND FIRST
metadata.f<-metadata.f[ order(metadata.f$sample.id), ]
tax3.f<-read.table("taxonomy_table_fungi.csv",h=T,sep = ",", na.strings=c("","undef"), row.names = 1)






#Remove some samples in both files so they have the same number:208-ITS28S.2016.B12.S24,16S2016.G04.S40,16S2017.H07.S43

abund.f<-abund.f[-24, ]
abund.b <- abund.b [(!(rownames(abund.b)=="X16S2016.G04.S40") & !(rownames(abund.b)=="X16S2017.H07.S43")),]


metadata.f<-metadata.f[-24, ]
metadata.b <- metadata.b [(!(metadata.b$sample.id=="16S2016.G04.S40") & !(metadata.b$sample.id=="16S2017.H07.S43")),]


#Select only Top ASVs for bacteria and fungi
abund.b<-abund.b[,colSums(abund.b)>20000]
abund.f<-abund.f[,colSums(abund.f)>11000]


abund <- cbind(abund.f, abund.b) #Join all abund in one data frame
abund.rel <- abund / rowSums(abund) #relative abundance

#Join taxa files

tax <- rbind(tax3.b, tax3.f)
tax <- tax[rownames(tax) %in% colnames(abund.rel), ]

#################
# Dissimilarity network
#######

distances <- vegdist(t(abund.rel), method = "bray")

# Convert distance object to a matrix
diss.mat <- as.matrix(distances)
diss.cutoff <- 0.6
diss.adj <- ifelse(diss.mat <= diss.cutoff, 1, 0)
# Construct microbiome network from adjacency matrix
diss.net <- graph.adjacency(diss.adj,
                            mode = "undirected",
                            diag = FALSE)


######################
# SparCC network
####################

sparcc.matrix <- sparcc(abund)
sparcc.cutoff <- 0.3
sparcc.adj <- ifelse(abs(sparcc.matrix$Cor) >= sparcc.cutoff, 1, 0)
# Add OTU names to rows and columns
rownames(sparcc.adj) <- colnames(abund)
colnames(sparcc.adj) <- colnames(abund)
# Build network from adjacency
sparcc.net <- graph.adjacency(sparcc.adj,
                              mode = "undirected",
                              diag = FALSE)
#Hub detection

# Use sparcc.net for the rest of the method
net <- sparcc.net
# Hub detection
net.cn <- closeness(net)
net.bn <- betweenness(net)
net.pr <- page_rank(net)$vector
net.hs <- hub_score(net)$vector

# Sort the species based on hubbiness score
net.hs.sort <- sort(net.hs, decreasing = TRUE)
# Choose the top 5 keystone species
net.hs.top5 <- head(net.hs.sort, n = 5)


#Cluster detection

# Get clusters
wt <- walktrap.community(net)
ml <- multilevel.community(net)
# Get membership of walktrap clusters
membership(wt)
# Get clusters using MCL method
adj <- as_adjacency_matrix(net)
mc <- mcl(adj, addLoops = TRUE)

# Compare clusters detected by different methods
compare(membership(wt), membership(ml))
compare(membership(wt), mc$Cluster)
# Create customized membership for comparison
expected.cls <- sample(1:5, vcount(net), replace = T) %>%
  as_membership
compare(expected.cls, membership(wt))

# Plot clusters as dendrogram
pdf(file = "dendrogram_clusters.pdf")
plot_dendrogram(wt, cex=0.2)
dev.off()

# Calculate modularity
modularity(net, membership(wt))


# Network features

nodes <- V(net)
edges <- V(net)
node.names <- V(net)$name
num.nodes <- vcount(net)
num.edges <- ecount(net)

clustering_coeff <- transitivity(net, type = "global")

#The following code snippet calculates and prints the average 
# nearest neighbor degree for all the nodes in the network (hence, vids 1/4 V(net)).
net.knn <- knn(net, vids = V(net))
net.knn$knn

#Remove isolated nodes

clean.net <- delete.vertices(net, which(degree(net, mode = "all")
                                        == 0))

# Network components
net.comps <- components(net)

# Print components membership
net.comps$membership
# Print components sizes
net.comps$csize
# Print number of components
net.comps$no

# Largest component
largest.comp <- V(net)[which.max(net.comps$csize) == net.comps$membership]
# Second component
second.comp <- V(net)[net.comps$membership == 2]

# Largest component subnetwork
largest.subnet <- induced_subgraph(net, largest.comp)

# Degrees
deg <- degree(net, mode = "all")
# Degree distribution
deg.dist <- degree_distribution(net, mode = "all", cumulative = T)
# Plot degree distribution
plot(deg.dist, xlab = "Nodes degree", ylab = "Probability")
lines(deg.dist)
# qgraph method
centralityPlot(net)


# Scalefreeness: Fit a power_law to the network
deg <- degree(net, mode = "in")
pl <- fit_power_law(deg, xmin = 10)
pl$KS.p

#Network similarity
node.similarity <- similarity(net, vids = V(net), mode = "all",
                              method = "jaccard")

# Find articulation points
AP <- articulation.points(net)


##################
# Network visualization
###################

# Simple plotting
plot(net)
plot(wt, net)



# Function 3: Plot network with clusters and node size scaled to hubbiness
plot.net.cls <- function(net, scores, cls, AP, outfile, title) {
  # Get size of clusters to find isolated nodes.
  cls_sizes <- sapply(groups(cls), length)
  # Randomly choosing node colors. Users can provide their own vector of colors.
  colors <- sample(colours(), length(cls))
  # Nodes in clusters will be color coded. Isolated nodes will be white.
  V(net)$color <- sapply(membership(cls),
                         function(x) {ifelse(cls_sizes[x]>1,
                                             colors[x], "white")})
  # Convert node label from names to numerical IDs.
  node.names <- V(net)$name
  col_ids <- seq(1, length(node.names))
  V(net)$name <- col_ids
  # To draw a halo around articulation points.
  AP <- lapply(names(AP), function(x) x)
  marks <- lapply(1:length(AP), function(x) which(node.names ==
                                                    AP[[x]]))
  # Define output image file.
  outfile <- paste(outfile, "jpg", sep=".")
  # Image properties.
  jpeg(outfile, width = 4800, height = 9200, res = 300, quality =
         100)
  par(oma = c(4, 1, 1, 1))
  # Customized layout to avoid nodes overlapping.
  e <- get.edgelist(net)
  class(e) <- "numeric"
  l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(net),
                                         area=8*(vcount(net)^2),
                                         repulse.rad=(vcount(net)^3.1))
  # Main plot function.
  plot(net, vertex.size = (scores*5)+4, vertex.label.cex=0.9,
       vertex.label.color = "black",
       mark.border="black",
       mark.groups = marks,
       mark.col = "white",
       mark.expand = 10,
       mark.shape = 1,
       layout=l)
  title(title, cex.main=4)
  # Plot legend containing OTU names.
  labels = paste(as.character(V(net)), node.names, sep =
                   ") ")
  legend("bottom", legend = labels, xpd = TRUE, ncol =
           5, cex = 1.2)
  dev.off()
}
# Execute this command after running Function 3
plot.net.cls(net, net.hs, wt, AP,
             outfile = "network2", title = "My Network")








#############################################################

library(RColorBrewer)




# Function 3: Plot network with clusters and node size scaled to hubbiness
plot.net.cls <- function(net, scores, cls, AP, outfile, title) {
  # Get size of clusters to find isolated nodes.
  cls_sizes <- sapply(groups(cls), length)
  # We provide vector of colors.
  colors <- brewer.pal(length(cls), "Set3")
  # Nodes in clusters will be color coded. Isolated nodes will be white.
  V(net)$color <- sapply(membership(cls),
                         function(x) {ifelse(cls_sizes[x]>1,
                                             colors[x], "white")})
  # Convert node label from names to numerical IDs.
  node.names <- V(net)$name
  col_ids_1 <- tax[node.names %in% rownames(tax), ]
  col_ids <- col_ids_1$Phylum
  V(net)$name <- col_ids
  # To draw a halo around articulation points.
  AP <- lapply(names(AP), function(x) x)
  marks <- lapply(1:length(AP), function(x) which(node.names ==
                                                    AP[[x]]))
  # Define output image file.
  outfile <- paste(outfile, "jpg", sep=".")
  # Image properties.
  jpeg(outfile, width = 4800, height = 9200, res = 300, quality =
         100)
  par(oma = c(4, 1, 1, 1))
  # Customized layout to avoid nodes overlapping.
  e <- get.edgelist(net)
  class(e) <- "numeric"
  l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(net),
                                         area=8*(vcount(net)^2),
                                         repulse.rad=(vcount(net)^3.1))
  # Main plot function.
  plot(net, vertex.size = (scores*5)+4, vertex.label.cex=0.9,
       vertex.label.color = "black",
       mark.border="black",
       mark.groups = marks,
       mark.col = "white",
       mark.expand = 10,
       mark.shape = 1,
       layout=l)
  title(title, cex.main=4)
  # Plot legend containing OTU names.
  labels = paste(as.character(V(net)), col_ids, sep =
                   ") ")
  legend("bottom", legend = labels, xpd = TRUE, ncol = 5, cex = 1.2)
  dev.off()
}
# Execute this command after running Function 3
plot.net.cls(net, net.hs, wt, AP,
             outfile = "network2", title = "My Network")





















































