require(tidyverse)
require(lubridate)
wsold<-read_csv(file.path("data","delivered","rwsbenthos_zwdelta_tijdelijk.csv"))

lack0<-which(substr(wsold$monsterdat,2,2)=="-")
wsold$monsterdat[lack0]<-lapply(wsold$monsterdat[lack0],FUN=function(x) paste0("0",x))
rm<-data.frame(nam=c("mrt","apr","mei","jun","aug","sep","okt","nov"),
                num=c("03","04","05","06","08","09","10","11"))
for(i in 1:nrow(rm)){
  mi<-which(substr(wsold$monsterdat,4,6)==rm$nam[i])
  wsold$monsterdat[mi]<-lapply(wsold$monsterdat[mi],FUN=function(x)sub(rm$nam[i],rm$num[i],x))
}
wsold$datum<-parse_date(as.character(wsold$monsterdat),format="%d-%m-%y")

wsold <- wsold %>%
  mutate(soortnaam=soortnaam_)%>%
  select("monster_id","rd_x","rd_y","gebied","zout","substraat","positie",
         "dynamiek","diepte_hoo","substraat_","datum","jaar","seizoen","veldproced",
         "bemonst_op","diepte_nap","soortnaam","ned_naam_n","groep","niveau",
         "phylum","classis","ordo","familia","herkomst_n","aanwezigheid","aantal",
         "asvdrg_mg","dichtheid_n_m2","biomassa_mg_m2")

##################### 2018 ##########################################################
# no substrate information available
mi18<-read_csv(file.path("data","delivered","minfo2018.csv"))%>%
  filter(substr(LOC_CODE,1,2)=="WS",
         SUB_NR==1)%>%
  mutate(date_mi=parse_date(DATE_SMP,format="%d-%m-%Y"))%>%
  select(EXT_REF,date_mi,Loc_name,COORD_X,COORD_Y,MONSDTE,AMT_CALC,UNT_CALC)%>%
  mutate(Loc_name=sub("Westerschelde ","",Loc_name),
         Loc_name=sub("Zout","z ",Loc_name),
         Loc_name=sub("Brak","b ",Loc_name),
         Loc_name=sub("laag dynamisch"," ld ",Loc_name),
         Loc_name=sub("hoog dynamisch"," hd ",Loc_name),
         Loc_name=sub("diep"," dd ",Loc_name),
         Loc_name=sub("laag litoraal"," ll ",Loc_name),
         Loc_name=sub("midden litoraal"," ml ",Loc_name),
         Loc_name=sub("hoog litoraal"," hl ",Loc_name),
         Loc_name=sub("litoraal"," lt ",Loc_name),
         Loc_name=sub("locatie ","",Loc_name),
         Loc_name=gsub(",","",Loc_name),
         Loc_name=gsub("  "," ",Loc_name),
         Loc_name=gsub("  "," ",Loc_name))%>%
  mutate(zout=ifelse(substr(Loc_name,1,1)=="z","zout",
                     ifelse(substr(Loc_name,1,1)=="b","brak",NA)),
         dynamiek=ifelse(substr(Loc_name,3,4)=="ld","laag dynamisch",
                         ifelse(substr(Loc_name,3,4)=="hd","hoog dynamisch",NA)),
         diepte_hoo=ifelse(substr(Loc_name,6,7)=="dd","diep",
                           ifelse(substr(Loc_name,6,7)=="ll","laag litoraal",
                                  ifelse(substr(Loc_name,6,7)=="ml","midden litoraal",
                                         ifelse(substr(Loc_name,6,7)=="hl","hoog litoraal",
                                                ifelse(substr(Loc_name,6,7)=="lt","litoraal",NA)))))
  ) %>%
  select (- Loc_name)%>%
  mutate(MONSDTE=ifelse(MONSDTE=="nvt",NA,MONSDTE),
         MONSDTE=sub(",",".",MONSDTE),
         MONSDTE=as.numeric(MONSDTE))%>%
  mutate(substraat_=NA)

names(mi18)<- tolower(names(mi18))

db18<-read_csv(file.path("data","delivered","densbiom2018.csv")) %>%
  filter(substr(loc_code,1,2)=="WS")%>%
  mutate(date_db=parse_date(date_smp,format="%d-%m-%Y"))%>%
  left_join(mi18,by="ext_ref")
names(db18)<-tolower(names(db18))


db18a <- db18 %>%
  mutate(monster_id=ext_ref,
         rd_x=coord_x,
         rd_y=coord_y,
         gebied="Westerschelde",
         datum=date_db,
         jaar=year(date_db),
         seizoen=ifelse(month(date_db)<8,"Voorjaar","Najaar"),
         veldproced=smp_app,
         bemonst_op=amt_calc.y,
         diepte_nap=monsdte,
         soortnaam=par_name,
         aanwezigheid="aanwezig",
         aantal=amt_calc.x,
         asvdrg_mg=ifelse(afdw_calc>0,afdw_calc,NA),
         dichtheid_n_m2=aantal/bemonst_op,
         biomassa_mg_m2=asvdrg_mg/bemonst_op,
         substraat="Zacht substraat",
         positie=ifelse(diepte_hoo=="diep","sublitoraal","litoraal")
  ) %>% 
  select("monster_id","rd_x","rd_y","gebied","zout","substraat","positie",
         "dynamiek","diepte_hoo","substraat_","datum","jaar","seizoen","veldproced",
         "bemonst_op","diepte_nap","soortnaam","aanwezigheid","aantal",
         "asvdrg_mg","dichtheid_n_m2","biomassa_mg_m2")

##################### 2019 ##########################################################
# no substrate information available
mi19<-read_csv(file.path("data","delivered","minfo2019.csv"))%>%
  filter(substr(LOC_CODE,1,2)=="WS",
         SUB_NR==1)%>%
  mutate(date_mi=parse_date(DATE_SMP,format="%d-%m-%Y"),
         Loc_name=LOC_OMSCHRIJVING,
         MONSDTE=Monsterdiepte)%>%
  select(EXT_REF,date_mi,Loc_name,COORD_X,COORD_Y,MONSDTE,AMT_CALC,UNT_CALC)%>%
  mutate(Loc_name=sub("Westerschelde ","",Loc_name),
         Loc_name=sub("Zout","z ",Loc_name),
         Loc_name=sub("Brak","b ",Loc_name),
         Loc_name=sub("laag dynamisch"," ld ",Loc_name),
         Loc_name=sub("hoog dynamisch"," hd ",Loc_name),
         Loc_name=sub("diep"," dd ",Loc_name),
         Loc_name=sub("laag litoraal"," ll ",Loc_name),
         Loc_name=sub("midden litoraal"," ml ",Loc_name),
         Loc_name=sub("hoog litoraal"," hl ",Loc_name),
         Loc_name=sub("litoraal"," lt ",Loc_name),
         Loc_name=sub("locatie ","",Loc_name),
         Loc_name=gsub(",","",Loc_name),
         Loc_name=gsub("  "," ",Loc_name),
         Loc_name=gsub("  "," ",Loc_name))%>%
  mutate(zout=ifelse(substr(Loc_name,1,1)=="z","zout",
                     ifelse(substr(Loc_name,1,1)=="b","brak",NA)),
         dynamiek=ifelse(substr(Loc_name,3,4)=="ld","laag dynamisch",
                         ifelse(substr(Loc_name,3,4)=="hd","hoog dynamisch",NA)),
         diepte_hoo=ifelse(substr(Loc_name,6,7)=="dd","diep",
                           ifelse(substr(Loc_name,6,7)=="ll","laag litoraal",
                                  ifelse(substr(Loc_name,6,7)=="ml","midden litoraal",
                                         ifelse(substr(Loc_name,6,7)=="hl","hoog litoraal",
                                                ifelse(substr(Loc_name,6,7)=="lt","litoraal",NA)))))
  ) %>%
  select (- Loc_name)%>%
  mutate(MONSDTE=ifelse(MONSDTE=="nvt",NA,MONSDTE),
         MONSDTE=sub(",",".",MONSDTE),
         MONSDTE=as.numeric(MONSDTE))%>%
  mutate(substraat_=NA)

names(mi19)<- tolower(names(mi19))

db19<-read_csv(file.path("data","delivered","densbiom2019.csv")) %>%
  filter(substr(loc_code,1,2)=="WS")%>%
  mutate(date_db=parse_date(date_smp,format="%d-%m-%Y"))%>%
  left_join(mi19,by="ext_ref")
names(db19)<-tolower(names(db19))


db19a <- db19 %>%
  mutate(monster_id=ext_ref,
         rd_x=coord_x,
         rd_y=coord_y,
         gebied="Westerschelde",
         datum=date_db,
         jaar=year(date_db),
         seizoen=ifelse(month(date_db)<8,"Voorjaar","Najaar"),
         veldproced=smp_app,
         bemonst_op=amt_calc.y,
         diepte_nap=monsdte,
         soortnaam=par_name,
         aanwezigheid="aanwezig",
         aantal=amt_calc.x,
         asvdrg_mg=ifelse(afdw_calc>0,afdw_calc,NA),
         dichtheid_n_m2=aantal/bemonst_op,
         biomassa_mg_m2=asvdrg_mg/bemonst_op,
         substraat="Zacht substraat",
         positie=ifelse(diepte_hoo=="diep","sublitoraal","litoraal")
  ) %>% 
  select("monster_id","rd_x","rd_y","gebied","zout","substraat","positie",
         "dynamiek","diepte_hoo","substraat_","datum","jaar","seizoen","veldproced",
         "bemonst_op","diepte_nap","soortnaam","aanwezigheid","aantal",
         "asvdrg_mg","dichtheid_n_m2","biomassa_mg_m2")

##################### 2020 ###########################################################
su20<-read_csv(file.path("data","delivered","substr20.csv"))%>%
  filter(substr(LOC_CODE,1,2)=="WS")%>%
  select( - redox_dep)


mi20<-read_csv(file.path("data","delivered","minfo2020.csv"))%>%
  filter(substr(LOC_CODE,1,2)=="WS")%>%
  mutate(date_mi=parse_date(DATE_SMP,format="%d/%m/%Y"))%>%
  select(EXT_REF,date_mi,Loc_name,COORD_X,COORD_Y,MONSDTE,AMT_CALC,UNT_CALC)%>%
  mutate(Loc_name=sub("Westerschelde ","",Loc_name),
         Loc_name=sub("Zout","z",Loc_name),
         Loc_name=sub("Brak","b",Loc_name),
         Loc_name=sub("laag dynamisch","ld",Loc_name),
         Loc_name=sub("hoog dynamisch","hd",Loc_name),
         Loc_name=sub("diep","dd",Loc_name),
         Loc_name=sub("laag litoraal","ll",Loc_name),
         Loc_name=sub("midden litoraal","ml",Loc_name),
         Loc_name=sub("hoog litoraal","hl",Loc_name),
         Loc_name=sub("litoraal","lt",Loc_name),
         Loc_name=sub("locatie ","",Loc_name),
         Loc_name=gsub(" - "," ",Loc_name))  %>%
  mutate(zout=ifelse(substr(Loc_name,1,1)=="z","zout",
                     ifelse(substr(Loc_name,1,1)=="b","brak",NA)),
         dynamiek=ifelse(substr(Loc_name,3,4)=="ld","laag dynamisch",
                         ifelse(substr(Loc_name,3,4)=="hd","hoog dynamisch",NA)),
         diepte_hoo=ifelse(substr(Loc_name,6,7)=="dd","diep",
                    ifelse(substr(Loc_name,6,7)=="ll","laag litoraal",
                    ifelse(substr(Loc_name,6,7)=="ml","midden litoraal",
                    ifelse(substr(Loc_name,6,7)=="hl","hoog litoraal",
                    ifelse(substr(Loc_name,6,7)=="lt","litoraal",NA)))))
         ) %>%
  select (- Loc_name) %>%
  left_join(su20,by="EXT_REF")

db20<-read_csv(file.path("data","delivered","densbiom2020.csv")) %>%
  filter(substr(LOC_CODE,1,2)=="WS")%>%
  mutate(date_db=parse_date(DATE_SMP,format="%d/%m/%Y"))%>%
  left_join(mi20,by="EXT_REF")
names(db20)<-tolower(names(db20))


db20a <- db20 %>%
  mutate(monster_id=ext_ref,
         rd_x=coord_x,
         rd_y=coord_y,
         gebied="Westerschelde",
         datum=date_db,
         jaar=year(date_db),
         seizoen=ifelse(month(date_db)<8,"Voorjaar","Najaar"),
         veldproced=smp_app,
         bemonst_op=amt_calc.y,
         diepte_nap=monsdte,
         soortnaam=par_name,
         aanwezigheid="aanwezig",
         aantal=amt_calc.x,
         asvdrg_mg=ifelse(afdw_calc>0,afdw_calc,NA),
         dichtheid_n_m2=aantal/bemonst_op,
         biomassa_mg_m2=asvdrg_mg/bemonst_op,
         substraat="Zacht substraat",
         positie=ifelse(diepte_hoo=="diep","sublitoraal","litoraal")
         )  %>%
  select("monster_id","rd_x","rd_y","gebied","zout","substraat","positie",
         "dynamiek","diepte_hoo","substraat_","datum","jaar","seizoen","veldproced",
         "bemonst_op","diepte_nap","soortnaam","aanwezigheid","aantal",
         "asvdrg_mg","dichtheid_n_m2","biomassa_mg_m2")

########### join years and add species attributes

wstot<-rbind(db18a,db19a,db20a)%>% 
  filter(soortnaam != "Dolichopodidae")%>%
  mutate(soortnaam=ifelse(soortnaam=="Eteone longa","Eteone",soortnaam))
tt<- wsold %>%
  select(soortnaam,ned_naam_n,groep,phylum,classis,ordo,familia)%>%
  distinct()
wstot <- wstot %>% left_join(tt,by="soortnaam")

######### make joint file

wsnew <- bind_rows(wsold,wstot)

save(wsnew,file=file.path("data/processed","wsnew.Rdata"))
write_csv(wsnew,file=file.path("data","processed","wsnew.csv"))
