#########################################################################################################
# Standardize raw observed data retrieved in the research, Siska Fitrianie, Merijn Bruijnes, Fengxiang Li, 
# Amal Abdulrahman, Willem-Paul Brinkman. 2022. The Artificial-Social-Agent Questionnaire: 
# Establishing the long and short questionnaire versions. In ACM International Conference on Intelligent 
# Virtual Agents (IVA’22), September, 2022, Faro, Portugal. ACM, New York, NY, USA.
# https://doi.org/10.1145/3514197.3549612
#
# Run CFA of two simulation datasets n=532
# Input:  result_all.csv
# Output: result_all_pItem_std.csv
# The script includes codes for retrieving stastical description of datasets per agent (#agent=14)
#########################################################################################################

## Library
library(dplyr) 
library(psych)
library(crayon)
library(CTT)

## Retrieve raw data
agents = c("AIBO", "AMY", "CHAPPIE", "DEEPBLUE","DOG","FURBY","HAL 9000","iCAT","NAO", "POPPIE","SIM SENSEI", "SIRI", "SARAH","MARCUS")
d_results_all=read.csv2("../raw ../raw data/result_all.csv", header = TRUE, sep =";")
drop=c("PRID","RID","STARTDATE","ENDDATE","RECORDDATE","CheckScore")
d_results = d_results_all[,!(names(d_results_all) %in% drop)]
d_AIBO = read.csv2("../raw data/result_AIBO.csv", header = TRUE, sep =";")
d_AMY = read.csv2("../raw data/result_AMY.csv", header = TRUE, sep =";")
d_CHAPPIE = read.csv2("../raw data/result_CHAPPIE.csv", header = TRUE, sep =";")
d_DEEPBLUE = read.csv2("../raw data/result_DEEPBLUE.csv", header = TRUE, sep =";")
d_DOG = read.csv2("../raw data/result_DOG.csv", header = TRUE, sep =";")
d_FURBY = read.csv2("../raw data/result_FURBY.csv", header = TRUE, sep =";")
d_HAL_9000 = read.csv2("../raw data/result_HAL 9000.csv", header = TRUE, sep =";")
d_iCAT = read.csv2("../raw data/result_iCAT.csv", header = TRUE, sep =";")
d_MARCUS = read.csv2("../raw data/result_MARCUS.csv", header = TRUE, sep =";")
d_NAO = read.csv2("../raw data/result_NAO.csv", header = TRUE, sep =";")
d_POPPY = read.csv2("../raw data/result_POPPY.csv", header = TRUE, sep =";")
d_SARAH = read.csv2("../raw data/result_SARAH.csv", header = TRUE, sep =";")
d_SIMSENSEI = read.csv2("../raw data/result_SIM SENSEI.csv", header = TRUE, sep =";")
d_SIRI = read.csv2("../raw data/result_SIRI.csv", header = TRUE, sep =";")

## Get datasets' stasticial description
desc_AIBO<-data.frame(round(describe(d_AIBO),2))
desc_AMY<-data.frame(round(describe(d_AMY),2))
desc_CHAPPIE<-data.frame(round(describe(d_CHAPPIE),2))
desc_DEEPBLUE<-data.frame(round(describe(d_DEEPBLUE),2))
desc_DOG<-data.frame(round(describe(d_DOG),2))
desc_FURBY<-data.frame(round(describe(d_FURBY),2))
desc_HAL_9000<-data.frame(round(describe(d_HAL_9000),2))
desc_iCAT<-data.frame(round(describe(d_iCAT),2))
desc_MARCUS<-data.frame(round(describe(d_MARCUS),2))
desc_NAO<-data.frame(round(describe(d_NAO),2))
desc_POPPY<-data.frame(round(describe(d_POPPY),2))
desc_SARAH<-data.frame(round(describe(d_SARAH),2))
desc_SIMSENSEI<-data.frame(round(describe(d_SIMSENSEI),2))
desc_SIRI<-data.frame(round(describe(d_SIRI),2))

## Standardize data
# the observed data was standardized to allow us to compare the ratings between different ASAs. 
# Standardization procedure: 
# 1. calculate the mean and the standard deviation per item per ASA. 
# 2. for each observed value of an item, subtract it by the mean based on its corresponding ASA
# 3. for each observed value of an item, divide it by the standard deviation based on its corresponding ASA
##########
standardize_pItem <- function(a, d_){
  d_M = data.frame(matrix(ncol = 132, nrow = 0))
  d_SD = data.frame(matrix(ncol = 132, nrow = 0))
  row = 1
  for (agent in a){
    d_M[row,1] <- agent
    d_SD[row,1] <- agent
    for(j in 2:132){
      d<-d_[d_[,1]==agent,j]
      if (length(d)==0) {
        print(j)
        print(paste("agent ", agent, " is empty", sep=" "))
      }
      M <- mean(d)
      std <- sd(d)
      d_M[row,j] <- M
      d_SD[row,j] <- std
    }
    row = row + 1
  }
  
  d_standard = data.frame()
  for(col in 2:ncol(d_)){
    newCol = col - 1
    for (row in 1:nrow(d_)){
        M = d_M[d_M[,1]==d_[row,1], col]
        std = d_SD[d_SD[,1]==d_[row,1], col]
        d_standard[row,newCol] <- (d_[row, col] - M)/std
    }
  }
  names(d_standard) <- colnames(d_[c(2:ncol(d_))])
  return(d_standard)
}

## standardize the raw data
d_results_pItem_std = standardize_pItem(agents, d_results)

## write the standardized dataset result
write.csv(d_results_pItem_std, "../r_results/result_all_pItem_std.csv" , row.names = FALSE)
